1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = 701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 747 "omp.arraycpy.dest.element"); 748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 749 } 750 751 // Shift the address forward by one element. 752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 754 "omp.arraycpy.dest.element"); 755 // Check whether we've reached the end. 756 llvm::Value *Done = 757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 760 761 // Done. 762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 763 } 764 765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 766 return CGF.EmitOMPSharedLValue(E); 767 } 768 769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 770 const Expr *E) { 771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 773 return LValue(); 774 } 775 776 void ReductionCodeGen::emitAggregateInitialization( 777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 778 const OMPDeclareReductionDecl *DRD) { 779 // Emit VarDecl with copy init for arrays. 780 // Get the address of the original variable captured in current 781 // captured region. 782 const auto *PrivateVD = 783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 784 bool EmitDeclareReductionInit = 785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 787 EmitDeclareReductionInit, 788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 789 : PrivateVD->getInit(), 790 DRD, SharedAddr); 791 } 792 793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 794 ArrayRef<const Expr *> Origs, 795 ArrayRef<const Expr *> Privates, 796 ArrayRef<const Expr *> ReductionOps) { 797 ClausesData.reserve(Shareds.size()); 798 SharedAddresses.reserve(Shareds.size()); 799 Sizes.reserve(Shareds.size()); 800 BaseDecls.reserve(Shareds.size()); 801 const auto *IOrig = Origs.begin(); 802 const auto *IPriv = Privates.begin(); 803 const auto *IRed = ReductionOps.begin(); 804 for (const Expr *Ref : Shareds) { 805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 806 std::advance(IOrig, 1); 807 std::advance(IPriv, 1); 808 std::advance(IRed, 1); 809 } 810 } 811 812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 814 "Number of generated lvalues must be exactly N."); 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 817 SharedAddresses.emplace_back(First, Second); 818 if (ClausesData[N].Shared == ClausesData[N].Ref) { 819 OrigAddresses.emplace_back(First, Second); 820 } else { 821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 823 OrigAddresses.emplace_back(First, Second); 824 } 825 } 826 827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 828 const auto *PrivateVD = 829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 830 QualType PrivateType = PrivateVD->getType(); 831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 832 if (!PrivateType->isVariablyModifiedType()) { 833 Sizes.emplace_back( 834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 835 nullptr); 836 return; 837 } 838 llvm::Value *Size; 839 llvm::Value *SizeInChars; 840 auto *ElemType = 841 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 842 ->getElementType(); 843 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 844 if (AsArraySection) { 845 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 846 OrigAddresses[N].first.getPointer(CGF)); 847 Size = CGF.Builder.CreateNUWAdd( 848 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 849 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 850 } else { 851 SizeInChars = 852 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 853 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 854 } 855 Sizes.emplace_back(SizeInChars, Size); 856 CodeGenFunction::OpaqueValueMapping OpaqueMap( 857 CGF, 858 cast<OpaqueValueExpr>( 859 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 860 RValue::get(Size)); 861 CGF.EmitVariablyModifiedType(PrivateType); 862 } 863 864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 865 llvm::Value *Size) { 866 const auto *PrivateVD = 867 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 868 QualType PrivateType = PrivateVD->getType(); 869 if (!PrivateType->isVariablyModifiedType()) { 870 assert(!Size && !Sizes[N].second && 871 "Size should be nullptr for non-variably modified reduction " 872 "items."); 873 return; 874 } 875 CodeGenFunction::OpaqueValueMapping OpaqueMap( 876 CGF, 877 cast<OpaqueValueExpr>( 878 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 879 RValue::get(Size)); 880 CGF.EmitVariablyModifiedType(PrivateType); 881 } 882 883 void ReductionCodeGen::emitInitialization( 884 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 885 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 886 assert(SharedAddresses.size() > N && "No variable was generated"); 887 const auto *PrivateVD = 888 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 889 const OMPDeclareReductionDecl *DRD = 890 getReductionInit(ClausesData[N].ReductionOp); 891 QualType PrivateType = PrivateVD->getType(); 892 PrivateAddr = CGF.Builder.CreateElementBitCast( 893 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 894 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 895 if (DRD && DRD->getInitializer()) 896 (void)DefaultInit(CGF); 897 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 898 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 899 (void)DefaultInit(CGF); 900 QualType SharedType = SharedAddresses[N].first.getType(); 901 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 902 PrivateAddr, SharedAddr, SharedType); 903 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 904 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 905 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 906 PrivateVD->getType().getQualifiers(), 907 /*IsInitializer=*/false); 908 } 909 } 910 911 bool ReductionCodeGen::needCleanups(unsigned N) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 return DTorKind != QualType::DK_none; 917 } 918 919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 920 Address PrivateAddr) { 921 const auto *PrivateVD = 922 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 923 QualType PrivateType = PrivateVD->getType(); 924 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 925 if (needCleanups(N)) { 926 PrivateAddr = CGF.Builder.CreateElementBitCast( 927 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 928 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 929 } 930 } 931 932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 933 LValue BaseLV) { 934 BaseTy = BaseTy.getNonReferenceType(); 935 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 936 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 937 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 938 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 939 } else { 940 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 941 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 942 } 943 BaseTy = BaseTy->getPointeeType(); 944 } 945 return CGF.MakeAddrLValue( 946 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 947 CGF.ConvertTypeForMem(ElTy)), 948 BaseLV.getType(), BaseLV.getBaseInfo(), 949 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 950 } 951 952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 953 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 954 llvm::Value *Addr) { 955 Address Tmp = Address::invalid(); 956 Address TopTmp = Address::invalid(); 957 Address MostTopTmp = Address::invalid(); 958 BaseTy = BaseTy.getNonReferenceType(); 959 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 960 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 961 Tmp = CGF.CreateMemTemp(BaseTy); 962 if (TopTmp.isValid()) 963 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 964 else 965 MostTopTmp = Tmp; 966 TopTmp = Tmp; 967 BaseTy = BaseTy->getPointeeType(); 968 } 969 llvm::Type *Ty = BaseLVType; 970 if (Tmp.isValid()) 971 Ty = Tmp.getElementType(); 972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 973 if (Tmp.isValid()) { 974 CGF.Builder.CreateStore(Addr, Tmp); 975 return MostTopTmp; 976 } 977 return Address(Addr, BaseLVAlignment); 978 } 979 980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 981 const VarDecl *OrigVD = nullptr; 982 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 983 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 984 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 985 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 987 Base = TempASE->getBase()->IgnoreParenImpCasts(); 988 DE = cast<DeclRefExpr>(Base); 989 OrigVD = cast<VarDecl>(DE->getDecl()); 990 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 991 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 993 Base = TempASE->getBase()->IgnoreParenImpCasts(); 994 DE = cast<DeclRefExpr>(Base); 995 OrigVD = cast<VarDecl>(DE->getDecl()); 996 } 997 return OrigVD; 998 } 999 1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1001 Address PrivateAddr) { 1002 const DeclRefExpr *DE; 1003 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1004 BaseDecls.emplace_back(OrigVD); 1005 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1006 LValue BaseLValue = 1007 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue); 1009 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1010 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1011 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1012 llvm::Value *PrivatePointer = 1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1014 PrivateAddr.getPointer(), SharedAddr.getType()); 1015 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1016 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1017 return castToBase(CGF, OrigVD->getType(), 1018 SharedAddresses[N].first.getType(), 1019 OriginalBaseLValue.getAddress(CGF).getType(), 1020 OriginalBaseLValue.getAlignment(), Ptr); 1021 } 1022 BaseDecls.emplace_back( 1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1024 return PrivateAddr; 1025 } 1026 1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1028 const OMPDeclareReductionDecl *DRD = 1029 getReductionInit(ClausesData[N].ReductionOp); 1030 return DRD && DRD->getInitializer(); 1031 } 1032 1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1034 return CGF.EmitLoadOfPointerLValue( 1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1036 getThreadIDVariable()->getType()->castAs<PointerType>()); 1037 } 1038 1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1040 if (!CGF.HaveInsertPoint()) 1041 return; 1042 // 1.2.2 OpenMP Language Terminology 1043 // Structured block - An executable statement with a single entry at the 1044 // top and a single exit at the bottom. 1045 // The point of exit cannot be a branch out of the structured block. 1046 // longjmp() and throw() must not violate the entry/exit criteria. 1047 CGF.EHStack.pushTerminate(); 1048 if (S) 1049 CGF.incrementProfileCounter(S); 1050 CodeGen(CGF); 1051 CGF.EHStack.popTerminate(); 1052 } 1053 1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1055 CodeGenFunction &CGF) { 1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1057 getThreadIDVariable()->getType(), 1058 AlignmentSource::Decl); 1059 } 1060 1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1062 QualType FieldTy) { 1063 auto *Field = FieldDecl::Create( 1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1067 Field->setAccess(AS_public); 1068 DC->addDecl(Field); 1069 return Field; 1070 } 1071 1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1073 StringRef Separator) 1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1077 1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1079 OMPBuilder.initialize(); 1080 loadOffloadInfoMetadata(); 1081 } 1082 1083 void CGOpenMPRuntime::clear() { 1084 InternalVars.clear(); 1085 // Clean non-target variable declarations possibly used only in debug info. 1086 for (const auto &Data : EmittedNonTargetVariables) { 1087 if (!Data.getValue().pointsToAliveValue()) 1088 continue; 1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1090 if (!GV) 1091 continue; 1092 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1093 continue; 1094 GV->eraseFromParent(); 1095 } 1096 } 1097 1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1099 SmallString<128> Buffer; 1100 llvm::raw_svector_ostream OS(Buffer); 1101 StringRef Sep = FirstSeparator; 1102 for (StringRef Part : Parts) { 1103 OS << Sep << Part; 1104 Sep = Separator; 1105 } 1106 return std::string(OS.str()); 1107 } 1108 1109 static llvm::Function * 1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1111 const Expr *CombinerInitializer, const VarDecl *In, 1112 const VarDecl *Out, bool IsCombiner) { 1113 // void .omp_combiner.(Ty *in, Ty *out); 1114 ASTContext &C = CGM.getContext(); 1115 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1116 FunctionArgList Args; 1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 Args.push_back(&OmpOutParm); 1122 Args.push_back(&OmpInParm); 1123 const CGFunctionInfo &FnInfo = 1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1126 std::string Name = CGM.getOpenMPRuntime().getName( 1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1129 Name, &CGM.getModule()); 1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1131 if (CGM.getLangOpts().Optimize) { 1132 Fn->removeFnAttr(llvm::Attribute::NoInline); 1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1135 } 1136 CodeGenFunction CGF(CGM); 1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1140 Out->getLocation()); 1141 CodeGenFunction::OMPPrivateScope Scope(CGF); 1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1145 .getAddress(CGF); 1146 }); 1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 (void)Scope.Privatize(); 1153 if (!IsCombiner && Out->hasInit() && 1154 !CGF.isTrivialInitializer(Out->getInit())) { 1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1156 Out->getType().getQualifiers(), 1157 /*IsInitializer=*/true); 1158 } 1159 if (CombinerInitializer) 1160 CGF.EmitIgnoredExpr(CombinerInitializer); 1161 Scope.ForceCleanup(); 1162 CGF.FinishFunction(); 1163 return Fn; 1164 } 1165 1166 void CGOpenMPRuntime::emitUserDefinedReduction( 1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1168 if (UDRMap.count(D) > 0) 1169 return; 1170 llvm::Function *Combiner = emitCombinerOrInitializer( 1171 CGM, D->getType(), D->getCombiner(), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1174 /*IsCombiner=*/true); 1175 llvm::Function *Initializer = nullptr; 1176 if (const Expr *Init = D->getInitializer()) { 1177 Initializer = emitCombinerOrInitializer( 1178 CGM, D->getType(), 1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1180 : nullptr, 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1183 /*IsCombiner=*/false); 1184 } 1185 UDRMap.try_emplace(D, Combiner, Initializer); 1186 if (CGF) { 1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1188 Decls.second.push_back(D); 1189 } 1190 } 1191 1192 std::pair<llvm::Function *, llvm::Function *> 1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1194 auto I = UDRMap.find(D); 1195 if (I != UDRMap.end()) 1196 return I->second; 1197 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1198 return UDRMap.lookup(D); 1199 } 1200 1201 namespace { 1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1203 // Builder if one is present. 1204 struct PushAndPopStackRAII { 1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1206 bool HasCancel, llvm::omp::Directive Kind) 1207 : OMPBuilder(OMPBuilder) { 1208 if (!OMPBuilder) 1209 return; 1210 1211 // The following callback is the crucial part of clangs cleanup process. 1212 // 1213 // NOTE: 1214 // Once the OpenMPIRBuilder is used to create parallel regions (and 1215 // similar), the cancellation destination (Dest below) is determined via 1216 // IP. That means if we have variables to finalize we split the block at IP, 1217 // use the new block (=BB) as destination to build a JumpDest (via 1218 // getJumpDestInCurrentScope(BB)) which then is fed to 1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1220 // to push & pop an FinalizationInfo object. 1221 // The FiniCB will still be needed but at the point where the 1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1224 assert(IP.getBlock()->end() == IP.getPoint() && 1225 "Clang CG should cause non-terminated block!"); 1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1227 CGF.Builder.restoreIP(IP); 1228 CodeGenFunction::JumpDest Dest = 1229 CGF.getOMPCancelDestination(OMPD_parallel); 1230 CGF.EmitBranchThroughCleanup(Dest); 1231 }; 1232 1233 // TODO: Remove this once we emit parallel regions through the 1234 // OpenMPIRBuilder as it can do this setup internally. 1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1236 OMPBuilder->pushFinalizationCB(std::move(FI)); 1237 } 1238 ~PushAndPopStackRAII() { 1239 if (OMPBuilder) 1240 OMPBuilder->popFinalizationCB(); 1241 } 1242 llvm::OpenMPIRBuilder *OMPBuilder; 1243 }; 1244 } // namespace 1245 1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1250 assert(ThreadIDVar->getType()->isPointerType() && 1251 "thread id variable must be of type kmp_int32 *"); 1252 CodeGenFunction CGF(CGM, true); 1253 bool HasCancel = false; 1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1255 HasCancel = OPD->hasCancel(); 1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1257 HasCancel = OPD->hasCancel(); 1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1259 HasCancel = OPSD->hasCancel(); 1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1261 HasCancel = OPFD->hasCancel(); 1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1263 HasCancel = OPFD->hasCancel(); 1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1274 // parallel region to make cancellation barriers work properly. 1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1278 HasCancel, OutlinedHelperName); 1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1281 } 1282 1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1287 return emitParallelOrTeamsOutlinedFunction( 1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1303 bool Tied, unsigned &NumberOfParts) { 1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1305 PrePostActionTy &) { 1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1308 llvm::Value *TaskArgs[] = { 1309 UpLoc, ThreadID, 1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1311 TaskTVar->getType()->castAs<PointerType>()) 1312 .getPointer(CGF)}; 1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1314 CGM.getModule(), OMPRTL___kmpc_omp_task), 1315 TaskArgs); 1316 }; 1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1318 UntiedCodeGen); 1319 CodeGen.setAction(Action); 1320 assert(!ThreadIDVar->getType()->isPointerType() && 1321 "thread id variable must be of type kmp_int32 for tasks"); 1322 const OpenMPDirectiveKind Region = 1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1324 : OMPD_task; 1325 const CapturedStmt *CS = D.getCapturedStmt(Region); 1326 bool HasCancel = false; 1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1328 HasCancel = TD->hasCancel(); 1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1332 HasCancel = TD->hasCancel(); 1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 1336 CodeGenFunction CGF(CGM, true); 1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1338 InnermostKind, HasCancel, Action); 1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1341 if (!Tied) 1342 NumberOfParts = Action.getNumberOfParts(); 1343 return Res; 1344 } 1345 1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1347 const RecordDecl *RD, const CGRecordLayout &RL, 1348 ArrayRef<llvm::Constant *> Data) { 1349 llvm::StructType *StructTy = RL.getLLVMType(); 1350 unsigned PrevIdx = 0; 1351 ConstantInitBuilder CIBuilder(CGM); 1352 auto DI = Data.begin(); 1353 for (const FieldDecl *FD : RD->fields()) { 1354 unsigned Idx = RL.getLLVMFieldNo(FD); 1355 // Fill the alignment. 1356 for (unsigned I = PrevIdx; I < Idx; ++I) 1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1358 PrevIdx = Idx + 1; 1359 Fields.add(*DI); 1360 ++DI; 1361 } 1362 } 1363 1364 template <class... As> 1365 static llvm::GlobalVariable * 1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1367 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1368 As &&... Args) { 1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1371 ConstantInitBuilder CIBuilder(CGM); 1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 return Fields.finishAndCreateGlobal( 1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1376 std::forward<As>(Args)...); 1377 } 1378 1379 template <typename T> 1380 static void 1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1382 ArrayRef<llvm::Constant *> Data, 1383 T &Parent) { 1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1387 buildStructValue(Fields, CGM, RD, RL, Data); 1388 Fields.finishAndAddTo(Parent); 1389 } 1390 1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1392 bool AtCurrentPoint) { 1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1395 1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1397 if (AtCurrentPoint) { 1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1400 } else { 1401 Elem.second.ServiceInsertPt = 1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1404 } 1405 } 1406 1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1409 if (Elem.second.ServiceInsertPt) { 1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1411 Elem.second.ServiceInsertPt = nullptr; 1412 Ptr->eraseFromParent(); 1413 } 1414 } 1415 1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1417 SourceLocation Loc, 1418 SmallString<128> &Buffer) { 1419 llvm::raw_svector_ostream OS(Buffer); 1420 // Build debug location 1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1422 OS << ";" << PLoc.getFilename() << ";"; 1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1424 OS << FD->getQualifiedNameAsString(); 1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1426 return OS.str(); 1427 } 1428 1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1430 SourceLocation Loc, 1431 unsigned Flags) { 1432 uint32_t SrcLocStrSize; 1433 llvm::Constant *SrcLocStr; 1434 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1435 Loc.isInvalid()) { 1436 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1437 } else { 1438 std::string FunctionName; 1439 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1440 FunctionName = FD->getQualifiedNameAsString(); 1441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1442 const char *FileName = PLoc.getFilename(); 1443 unsigned Line = PLoc.getLine(); 1444 unsigned Column = PLoc.getColumn(); 1445 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1446 Column, SrcLocStrSize); 1447 } 1448 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1449 return OMPBuilder.getOrCreateIdent( 1450 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1451 } 1452 1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1454 SourceLocation Loc) { 1455 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1456 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1457 // the clang invariants used below might be broken. 1458 if (CGM.getLangOpts().OpenMPIRBuilder) { 1459 SmallString<128> Buffer; 1460 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1461 uint32_t SrcLocStrSize; 1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1464 return OMPBuilder.getOrCreateThreadID( 1465 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1466 } 1467 1468 llvm::Value *ThreadID = nullptr; 1469 // Check whether we've already cached a load of the thread id in this 1470 // function. 1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1472 if (I != OpenMPLocThreadIDMap.end()) { 1473 ThreadID = I->second.ThreadID; 1474 if (ThreadID != nullptr) 1475 return ThreadID; 1476 } 1477 // If exceptions are enabled, do not use parameter to avoid possible crash. 1478 if (auto *OMPRegionInfo = 1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1480 if (OMPRegionInfo->getThreadIDVariable()) { 1481 // Check if this an outlined function with thread id passed as argument. 1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1485 !CGF.getLangOpts().CXXExceptions || 1486 CGF.Builder.GetInsertBlock() == TopBlock || 1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1489 TopBlock || 1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1491 CGF.Builder.GetInsertBlock()) { 1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1493 // If value loaded in entry block, cache it and use it everywhere in 1494 // function. 1495 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1497 Elem.second.ThreadID = ThreadID; 1498 } 1499 return ThreadID; 1500 } 1501 } 1502 } 1503 1504 // This is not an outlined function region - need to call __kmpc_int32 1505 // kmpc_global_thread_num(ident_t *loc). 1506 // Generate thread id value and cache this value for use across the 1507 // function. 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 if (!Elem.second.ServiceInsertPt) 1510 setLocThreadIdInsertPt(CGF); 1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1513 llvm::CallInst *Call = CGF.Builder.CreateCall( 1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1515 OMPRTL___kmpc_global_thread_num), 1516 emitUpdateLocation(CGF, Loc)); 1517 Call->setCallingConv(CGF.getRuntimeCC()); 1518 Elem.second.ThreadID = Call; 1519 return Call; 1520 } 1521 1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1523 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1525 clearLocThreadIdInsertPt(CGF); 1526 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1527 } 1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1529 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1530 UDRMap.erase(D); 1531 FunctionUDRMap.erase(CGF.CurFn); 1532 } 1533 auto I = FunctionUDMMap.find(CGF.CurFn); 1534 if (I != FunctionUDMMap.end()) { 1535 for(const auto *D : I->second) 1536 UDMMap.erase(D); 1537 FunctionUDMMap.erase(I); 1538 } 1539 LastprivateConditionalToTypes.erase(CGF.CurFn); 1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1544 return OMPBuilder.IdentPtr; 1545 } 1546 1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1548 if (!Kmpc_MicroTy) { 1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1551 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1553 } 1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1555 } 1556 1557 llvm::FunctionCallee 1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1559 bool IsGPUDistribute) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name; 1563 if (IsGPUDistribute) 1564 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1565 : "__kmpc_distribute_static_init_4u") 1566 : (IVSigned ? "__kmpc_distribute_static_init_8" 1567 : "__kmpc_distribute_static_init_8u"); 1568 else 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1570 : "__kmpc_for_static_init_4u") 1571 : (IVSigned ? "__kmpc_for_static_init_8" 1572 : "__kmpc_for_static_init_8u"); 1573 1574 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1575 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1576 llvm::Type *TypeParams[] = { 1577 getIdentTyPointerTy(), // loc 1578 CGM.Int32Ty, // tid 1579 CGM.Int32Ty, // schedtype 1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1581 PtrTy, // p_lower 1582 PtrTy, // p_upper 1583 PtrTy, // p_stride 1584 ITy, // incr 1585 ITy // chunk 1586 }; 1587 auto *FnTy = 1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1589 return CGM.CreateRuntimeFunction(FnTy, Name); 1590 } 1591 1592 llvm::FunctionCallee 1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1594 assert((IVSize == 32 || IVSize == 64) && 1595 "IV size is not compatible with the omp runtime"); 1596 StringRef Name = 1597 IVSize == 32 1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1600 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1602 CGM.Int32Ty, // tid 1603 CGM.Int32Ty, // schedtype 1604 ITy, // lower 1605 ITy, // upper 1606 ITy, // stride 1607 ITy // chunk 1608 }; 1609 auto *FnTy = 1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1611 return CGM.CreateRuntimeFunction(FnTy, Name); 1612 } 1613 1614 llvm::FunctionCallee 1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1616 assert((IVSize == 32 || IVSize == 64) && 1617 "IV size is not compatible with the omp runtime"); 1618 StringRef Name = 1619 IVSize == 32 1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1622 llvm::Type *TypeParams[] = { 1623 getIdentTyPointerTy(), // loc 1624 CGM.Int32Ty, // tid 1625 }; 1626 auto *FnTy = 1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1628 return CGM.CreateRuntimeFunction(FnTy, Name); 1629 } 1630 1631 llvm::FunctionCallee 1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1633 assert((IVSize == 32 || IVSize == 64) && 1634 "IV size is not compatible with the omp runtime"); 1635 StringRef Name = 1636 IVSize == 32 1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1639 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1640 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1641 llvm::Type *TypeParams[] = { 1642 getIdentTyPointerTy(), // loc 1643 CGM.Int32Ty, // tid 1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1645 PtrTy, // p_lower 1646 PtrTy, // p_upper 1647 PtrTy // p_stride 1648 }; 1649 auto *FnTy = 1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1651 return CGM.CreateRuntimeFunction(FnTy, Name); 1652 } 1653 1654 /// Obtain information that uniquely identifies a target entry. This 1655 /// consists of the file and device IDs as well as line number associated with 1656 /// the relevant entry source location. 1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1658 unsigned &DeviceID, unsigned &FileID, 1659 unsigned &LineNum) { 1660 SourceManager &SM = C.getSourceManager(); 1661 1662 // The loc should be always valid and have a file ID (the user cannot use 1663 // #pragma directives in macros) 1664 1665 assert(Loc.isValid() && "Source location is expected to be always valid."); 1666 1667 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1668 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1669 1670 llvm::sys::fs::UniqueID ID; 1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1672 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1675 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1676 << PLoc.getFilename() << EC.message(); 1677 } 1678 1679 DeviceID = ID.getDevice(); 1680 FileID = ID.getFile(); 1681 LineNum = PLoc.getLine(); 1682 } 1683 1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1685 if (CGM.getLangOpts().OpenMPSimd) 1686 return Address::invalid(); 1687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1689 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1690 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1691 HasRequiresUnifiedSharedMemory))) { 1692 SmallString<64> PtrName; 1693 { 1694 llvm::raw_svector_ostream OS(PtrName); 1695 OS << CGM.getMangledName(GlobalDecl(VD)); 1696 if (!VD->isExternallyVisible()) { 1697 unsigned DeviceID, FileID, Line; 1698 getTargetEntryUniqueInfo(CGM.getContext(), 1699 VD->getCanonicalDecl()->getBeginLoc(), 1700 DeviceID, FileID, Line); 1701 OS << llvm::format("_%x", FileID); 1702 } 1703 OS << "_decl_tgt_ref_ptr"; 1704 } 1705 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1706 if (!Ptr) { 1707 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1708 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1709 PtrName); 1710 1711 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1712 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1713 1714 if (!CGM.getLangOpts().OpenMPIsDevice) 1715 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1716 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1717 } 1718 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1719 } 1720 return Address::invalid(); 1721 } 1722 1723 llvm::Constant * 1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1725 assert(!CGM.getLangOpts().OpenMPUseTLS || 1726 !CGM.getContext().getTargetInfo().isTLSSupported()); 1727 // Lookup the entry, lazily creating it if necessary. 1728 std::string Suffix = getName({"cache", ""}); 1729 return getOrCreateInternalVariable( 1730 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1731 } 1732 1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1734 const VarDecl *VD, 1735 Address VDAddr, 1736 SourceLocation Loc) { 1737 if (CGM.getLangOpts().OpenMPUseTLS && 1738 CGM.getContext().getTargetInfo().isTLSSupported()) 1739 return VDAddr; 1740 1741 llvm::Type *VarTy = VDAddr.getElementType(); 1742 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1743 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1744 CGM.Int8PtrTy), 1745 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1746 getOrCreateThreadPrivateCache(VD)}; 1747 return Address(CGF.EmitRuntimeCall( 1748 OMPBuilder.getOrCreateRuntimeFunction( 1749 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1750 Args), 1751 VDAddr.getAlignment()); 1752 } 1753 1754 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1758 // library. 1759 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1760 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1761 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1762 OMPLoc); 1763 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1764 // to register constructor/destructor for variable. 1765 llvm::Value *Args[] = { 1766 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1767 Ctor, CopyCtor, Dtor}; 1768 CGF.EmitRuntimeCall( 1769 OMPBuilder.getOrCreateRuntimeFunction( 1770 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1771 Args); 1772 } 1773 1774 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1775 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1776 bool PerformInit, CodeGenFunction *CGF) { 1777 if (CGM.getLangOpts().OpenMPUseTLS && 1778 CGM.getContext().getTargetInfo().isTLSSupported()) 1779 return nullptr; 1780 1781 VD = VD->getDefinition(CGM.getContext()); 1782 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1783 QualType ASTTy = VD->getType(); 1784 1785 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1786 const Expr *Init = VD->getAnyInitializer(); 1787 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1788 // Generate function that re-emits the declaration's initializer into the 1789 // threadprivate copy of the variable VD 1790 CodeGenFunction CtorCGF(CGM); 1791 FunctionArgList Args; 1792 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1793 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1794 ImplicitParamDecl::Other); 1795 Args.push_back(&Dst); 1796 1797 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1798 CGM.getContext().VoidPtrTy, Args); 1799 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1800 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1801 llvm::Function *Fn = 1802 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1803 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1804 Args, Loc, Loc); 1805 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1809 Arg = CtorCGF.Builder.CreateElementBitCast( 1810 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1811 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1812 /*IsInitializer=*/true); 1813 ArgVal = CtorCGF.EmitLoadOfScalar( 1814 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1815 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1816 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1817 CtorCGF.FinishFunction(); 1818 Ctor = Fn; 1819 } 1820 if (VD->getType().isDestructedType() != QualType::DK_none) { 1821 // Generate function that emits destructor call for the threadprivate copy 1822 // of the variable VD 1823 CodeGenFunction DtorCGF(CGM); 1824 FunctionArgList Args; 1825 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1826 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1827 ImplicitParamDecl::Other); 1828 Args.push_back(&Dst); 1829 1830 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1831 CGM.getContext().VoidTy, Args); 1832 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1833 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1834 llvm::Function *Fn = 1835 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1836 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1837 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1838 Loc, Loc); 1839 // Create a scope with an artificial location for the body of this function. 1840 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1841 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1842 DtorCGF.GetAddrOfLocalVar(&Dst), 1843 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1844 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1845 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1846 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1847 DtorCGF.FinishFunction(); 1848 Dtor = Fn; 1849 } 1850 // Do not emit init function if it is not required. 1851 if (!Ctor && !Dtor) 1852 return nullptr; 1853 1854 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1855 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 // Copying constructor for the threadprivate variable. 1859 // Must be NULL - reserved by runtime, but currently it requires that this 1860 // parameter is always NULL. Otherwise it fires assertion. 1861 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1862 if (Ctor == nullptr) { 1863 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1864 /*isVarArg=*/false) 1865 ->getPointerTo(); 1866 Ctor = llvm::Constant::getNullValue(CtorTy); 1867 } 1868 if (Dtor == nullptr) { 1869 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1870 /*isVarArg=*/false) 1871 ->getPointerTo(); 1872 Dtor = llvm::Constant::getNullValue(DtorTy); 1873 } 1874 if (!CGF) { 1875 auto *InitFunctionTy = 1876 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1877 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1878 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1879 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1880 CodeGenFunction InitCGF(CGM); 1881 FunctionArgList ArgList; 1882 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1883 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1884 Loc, Loc); 1885 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1886 InitCGF.FinishFunction(); 1887 return InitFunction; 1888 } 1889 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1890 } 1891 return nullptr; 1892 } 1893 1894 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1895 llvm::GlobalVariable *Addr, 1896 bool PerformInit) { 1897 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1898 !CGM.getLangOpts().OpenMPIsDevice) 1899 return false; 1900 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1901 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1902 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1903 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1904 HasRequiresUnifiedSharedMemory)) 1905 return CGM.getLangOpts().OpenMPIsDevice; 1906 VD = VD->getDefinition(CGM.getContext()); 1907 assert(VD && "Unknown VarDecl"); 1908 1909 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1910 return CGM.getLangOpts().OpenMPIsDevice; 1911 1912 QualType ASTTy = VD->getType(); 1913 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1914 1915 // Produce the unique prefix to identify the new target regions. We use 1916 // the source location of the variable declaration which we know to not 1917 // conflict with any target region. 1918 unsigned DeviceID; 1919 unsigned FileID; 1920 unsigned Line; 1921 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1922 SmallString<128> Buffer, Out; 1923 { 1924 llvm::raw_svector_ostream OS(Buffer); 1925 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1926 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1927 } 1928 1929 const Expr *Init = VD->getAnyInitializer(); 1930 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1931 llvm::Constant *Ctor; 1932 llvm::Constant *ID; 1933 if (CGM.getLangOpts().OpenMPIsDevice) { 1934 // Generate function that re-emits the declaration's initializer into 1935 // the threadprivate copy of the variable VD 1936 CodeGenFunction CtorCGF(CGM); 1937 1938 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1939 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1940 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1941 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1942 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1943 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1944 FunctionArgList(), Loc, Loc); 1945 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1946 CtorCGF.EmitAnyExprToMem(Init, 1947 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1948 Init->getType().getQualifiers(), 1949 /*IsInitializer=*/true); 1950 CtorCGF.FinishFunction(); 1951 Ctor = Fn; 1952 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1953 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1954 } else { 1955 Ctor = new llvm::GlobalVariable( 1956 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1957 llvm::GlobalValue::PrivateLinkage, 1958 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1959 ID = Ctor; 1960 } 1961 1962 // Register the information for the entry associated with the constructor. 1963 Out.clear(); 1964 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1965 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1966 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1967 } 1968 if (VD->getType().isDestructedType() != QualType::DK_none) { 1969 llvm::Constant *Dtor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that emits destructor call for the threadprivate 1973 // copy of the variable VD 1974 CodeGenFunction DtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1979 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1981 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 // Create a scope with an artificial location for the body of this 1984 // function. 1985 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1986 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1989 DtorCGF.FinishFunction(); 1990 Dtor = Fn; 1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1993 } else { 1994 Dtor = new llvm::GlobalVariable( 1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1996 llvm::GlobalValue::PrivateLinkage, 1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1998 ID = Dtor; 1999 } 2000 // Register the information for the entry associated with the destructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2005 } 2006 return CGM.getLangOpts().OpenMPIsDevice; 2007 } 2008 2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2010 QualType VarType, 2011 StringRef Name) { 2012 std::string Suffix = getName({"artificial", ""}); 2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2014 llvm::GlobalVariable *GAddr = 2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2017 CGM.getTarget().isTLSSupported()) { 2018 GAddr->setThreadLocal(/*Val=*/true); 2019 return Address(GAddr, GAddr->getValueType(), 2020 CGM.getContext().getTypeAlignInChars(VarType)); 2021 } 2022 std::string CacheSuffix = getName({"cache", ""}); 2023 llvm::Value *Args[] = { 2024 emitUpdateLocation(CGF, SourceLocation()), 2025 getThreadID(CGF, SourceLocation()), 2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2028 /*isSigned=*/false), 2029 getOrCreateInternalVariable( 2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2031 return Address( 2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2033 CGF.EmitRuntimeCall( 2034 OMPBuilder.getOrCreateRuntimeFunction( 2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2036 Args), 2037 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2038 CGM.getContext().getTypeAlignInChars(VarType)); 2039 } 2040 2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2042 const RegionCodeGenTy &ThenGen, 2043 const RegionCodeGenTy &ElseGen) { 2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2045 2046 // If the condition constant folds and can be elided, try to avoid emitting 2047 // the condition and the dead arm of the if/else. 2048 bool CondConstant; 2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2050 if (CondConstant) 2051 ThenGen(CGF); 2052 else 2053 ElseGen(CGF); 2054 return; 2055 } 2056 2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2058 // emit the conditional branch. 2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2063 2064 // Emit the 'then' code. 2065 CGF.EmitBlock(ThenBlock); 2066 ThenGen(CGF); 2067 CGF.EmitBranch(ContBlock); 2068 // Emit the 'else' code if present. 2069 // There is no need to emit line number for unconditional branch. 2070 (void)ApplyDebugLocation::CreateEmpty(CGF); 2071 CGF.EmitBlock(ElseBlock); 2072 ElseGen(CGF); 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBranch(ContBlock); 2076 // Emit the continuation block for code after the if. 2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2078 } 2079 2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2081 llvm::Function *OutlinedFn, 2082 ArrayRef<llvm::Value *> CapturedVars, 2083 const Expr *IfCond, 2084 llvm::Value *NumThreads) { 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2088 auto &M = CGM.getModule(); 2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2090 this](CodeGenFunction &CGF, PrePostActionTy &) { 2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2093 llvm::Value *Args[] = { 2094 RTLoc, 2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2097 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2098 RealArgs.append(std::begin(Args), std::end(Args)); 2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 2101 llvm::FunctionCallee RTLFn = 2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2103 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2104 }; 2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2106 this](CodeGenFunction &CGF, PrePostActionTy &) { 2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2109 // Build calls: 2110 // __kmpc_serialized_parallel(&Loc, GTid); 2111 llvm::Value *Args[] = {RTLoc, ThreadID}; 2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2113 M, OMPRTL___kmpc_serialized_parallel), 2114 Args); 2115 2116 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2118 Address ZeroAddrBound = 2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2120 /*Name=*/".bound.zero.addr"); 2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2123 // ThreadId for serialized parallels is 0. 2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2127 2128 // Ensure we do not inline the function. This is trivially true for the ones 2129 // passed to __kmpc_fork_call but the ones called in serialized regions 2130 // could be inlined. This is not a perfect but it is closer to the invariant 2131 // we want, namely, every data environment starts with a new function. 2132 // TODO: We should pass the if condition to the runtime function and do the 2133 // handling there. Much cleaner code. 2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2137 2138 // __kmpc_end_serialized_parallel(&Loc, GTid); 2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2141 M, OMPRTL___kmpc_end_serialized_parallel), 2142 EndArgs); 2143 }; 2144 if (IfCond) { 2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2146 } else { 2147 RegionCodeGenTy ThenRCG(ThenGen); 2148 ThenRCG(CGF); 2149 } 2150 } 2151 2152 // If we're inside an (outlined) parallel region, use the region info's 2153 // thread-ID variable (it is passed in a first argument of the outlined function 2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2155 // regular serial code region, get thread ID by calling kmp_int32 2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2157 // return the address of that temp. 2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2159 SourceLocation Loc) { 2160 if (auto *OMPRegionInfo = 2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2162 if (OMPRegionInfo->getThreadIDVariable()) 2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2164 2165 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2166 QualType Int32Ty = 2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2169 CGF.EmitStoreOfScalar(ThreadID, 2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2171 2172 return ThreadIDTemp; 2173 } 2174 2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2177 SmallString<256> Buffer; 2178 llvm::raw_svector_ostream Out(Buffer); 2179 Out << Name; 2180 StringRef RuntimeName = Out.str(); 2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2182 if (Elem.second) { 2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2184 "OMP internal variable has different type than requested"); 2185 return &*Elem.second; 2186 } 2187 2188 return Elem.second = new llvm::GlobalVariable( 2189 CGM.getModule(), Ty, /*IsConstant*/ false, 2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2191 Elem.first(), /*InsertBefore=*/nullptr, 2192 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2193 } 2194 2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2197 std::string Name = getName({Prefix, "var"}); 2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2199 } 2200 2201 namespace { 2202 /// Common pre(post)-action for different OpenMP constructs. 2203 class CommonActionTy final : public PrePostActionTy { 2204 llvm::FunctionCallee EnterCallee; 2205 ArrayRef<llvm::Value *> EnterArgs; 2206 llvm::FunctionCallee ExitCallee; 2207 ArrayRef<llvm::Value *> ExitArgs; 2208 bool Conditional; 2209 llvm::BasicBlock *ContBlock = nullptr; 2210 2211 public: 2212 CommonActionTy(llvm::FunctionCallee EnterCallee, 2213 ArrayRef<llvm::Value *> EnterArgs, 2214 llvm::FunctionCallee ExitCallee, 2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2217 ExitArgs(ExitArgs), Conditional(Conditional) {} 2218 void Enter(CodeGenFunction &CGF) override { 2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2220 if (Conditional) { 2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2223 ContBlock = CGF.createBasicBlock("omp_if.end"); 2224 // Generate the branch (If-stmt) 2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2226 CGF.EmitBlock(ThenBlock); 2227 } 2228 } 2229 void Done(CodeGenFunction &CGF) { 2230 // Emit the rest of blocks/branches 2231 CGF.EmitBranch(ContBlock); 2232 CGF.EmitBlock(ContBlock, true); 2233 } 2234 void Exit(CodeGenFunction &CGF) override { 2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2236 } 2237 }; 2238 } // anonymous namespace 2239 2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2241 StringRef CriticalName, 2242 const RegionCodeGenTy &CriticalOpGen, 2243 SourceLocation Loc, const Expr *Hint) { 2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2245 // CriticalOpGen(); 2246 // __kmpc_end_critical(ident_t *, gtid, Lock); 2247 // Prepare arguments and build a call to __kmpc_critical 2248 if (!CGF.HaveInsertPoint()) 2249 return; 2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2251 getCriticalRegionLock(CriticalName)}; 2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2253 std::end(Args)); 2254 if (Hint) { 2255 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2257 } 2258 CommonActionTy Action( 2259 OMPBuilder.getOrCreateRuntimeFunction( 2260 CGM.getModule(), 2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2262 EnterArgs, 2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2264 OMPRTL___kmpc_end_critical), 2265 Args); 2266 CriticalOpGen.setAction(Action); 2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2268 } 2269 2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2271 const RegionCodeGenTy &MasterOpGen, 2272 SourceLocation Loc) { 2273 if (!CGF.HaveInsertPoint()) 2274 return; 2275 // if(__kmpc_master(ident_t *, gtid)) { 2276 // MasterOpGen(); 2277 // __kmpc_end_master(ident_t *, gtid); 2278 // } 2279 // Prepare arguments and build a call to __kmpc_master 2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2282 CGM.getModule(), OMPRTL___kmpc_master), 2283 Args, 2284 OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_end_master), 2286 Args, 2287 /*Conditional=*/true); 2288 MasterOpGen.setAction(Action); 2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2290 Action.Done(CGF); 2291 } 2292 2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2294 const RegionCodeGenTy &MaskedOpGen, 2295 SourceLocation Loc, const Expr *Filter) { 2296 if (!CGF.HaveInsertPoint()) 2297 return; 2298 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2299 // MaskedOpGen(); 2300 // __kmpc_end_masked(iden_t *, gtid); 2301 // } 2302 // Prepare arguments and build a call to __kmpc_masked 2303 llvm::Value *FilterVal = Filter 2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2307 FilterVal}; 2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2309 getThreadID(CGF, Loc)}; 2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2311 CGM.getModule(), OMPRTL___kmpc_masked), 2312 Args, 2313 OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_end_masked), 2315 ArgsEnd, 2316 /*Conditional=*/true); 2317 MaskedOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2319 Action.Done(CGF); 2320 } 2321 2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2323 SourceLocation Loc) { 2324 if (!CGF.HaveInsertPoint()) 2325 return; 2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2327 OMPBuilder.createTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2371 Addr = CGF.Builder.CreateElementBitCast( 2372 Addr, CGF.ConvertTypeForMem(Var->getType())); 2373 return Addr; 2374 } 2375 2376 static llvm::Value *emitCopyprivateCopyFunction( 2377 CodeGenModule &CGM, llvm::Type *ArgsType, 2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2380 SourceLocation Loc) { 2381 ASTContext &C = CGM.getContext(); 2382 // void copy_func(void *LHSArg, void *RHSArg); 2383 FunctionArgList Args; 2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&LHSArg); 2389 Args.push_back(&RHSArg); 2390 const auto &CGFI = 2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2392 std::string Name = 2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2395 llvm::GlobalValue::InternalLinkage, Name, 2396 &CGM.getModule()); 2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2398 Fn->setDoesNotRecurse(); 2399 CodeGenFunction CGF(CGM); 2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2401 // Dest = (void*[n])(LHSArg); 2402 // Src = (void*[n])(RHSArg); 2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2405 ArgsType), CGF.getPointerAlign()); 2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2502 CGF.VoidPtrTy); 2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2504 llvm::Value *Args[] = { 2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2506 getThreadID(CGF, Loc), // i32 <gtid> 2507 BufSize, // size_t <buf_size> 2508 CL.getPointer(), // void *<copyprivate list> 2509 CpyFn, // void (*) (void *, void *) <copy_func> 2510 DidItVal // i32 did_it 2511 }; 2512 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2513 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2514 Args); 2515 } 2516 } 2517 2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2519 const RegionCodeGenTy &OrderedOpGen, 2520 SourceLocation Loc, bool IsThreads) { 2521 if (!CGF.HaveInsertPoint()) 2522 return; 2523 // __kmpc_ordered(ident_t *, gtid); 2524 // OrderedOpGen(); 2525 // __kmpc_end_ordered(ident_t *, gtid); 2526 // Prepare arguments and build a call to __kmpc_ordered 2527 if (IsThreads) { 2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2529 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2530 CGM.getModule(), OMPRTL___kmpc_ordered), 2531 Args, 2532 OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2534 Args); 2535 OrderedOpGen.setAction(Action); 2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2537 return; 2538 } 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 } 2541 2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2543 unsigned Flags; 2544 if (Kind == OMPD_for) 2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2546 else if (Kind == OMPD_sections) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2548 else if (Kind == OMPD_single) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2550 else if (Kind == OMPD_barrier) 2551 Flags = OMP_IDENT_BARRIER_EXPL; 2552 else 2553 Flags = OMP_IDENT_BARRIER_IMPL; 2554 return Flags; 2555 } 2556 2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2558 CodeGenFunction &CGF, const OMPLoopDirective &S, 2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2560 // Check if the loop directive is actually a doacross loop directive. In this 2561 // case choose static, 1 schedule. 2562 if (llvm::any_of( 2563 S.getClausesOfKind<OMPOrderedClause>(), 2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2565 ScheduleKind = OMPC_SCHEDULE_static; 2566 // Chunk size is 1 in this case. 2567 llvm::APInt ChunkSize(32, 1); 2568 ChunkExpr = IntegerLiteral::Create( 2569 CGF.getContext(), ChunkSize, 2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2571 SourceLocation()); 2572 } 2573 } 2574 2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2576 OpenMPDirectiveKind Kind, bool EmitChecks, 2577 bool ForceSimpleCall) { 2578 // Check if we should use the OMPBuilder 2579 auto *OMPRegionInfo = 2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2581 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2582 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2583 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2584 return; 2585 } 2586 2587 if (!CGF.HaveInsertPoint()) 2588 return; 2589 // Build call __kmpc_cancel_barrier(loc, thread_id); 2590 // Build call __kmpc_barrier(loc, thread_id); 2591 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2592 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2593 // thread_id); 2594 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2595 getThreadID(CGF, Loc)}; 2596 if (OMPRegionInfo) { 2597 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2598 llvm::Value *Result = CGF.EmitRuntimeCall( 2599 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2600 OMPRTL___kmpc_cancel_barrier), 2601 Args); 2602 if (EmitChecks) { 2603 // if (__kmpc_cancel_barrier()) { 2604 // exit from construct; 2605 // } 2606 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2607 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2608 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2609 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2610 CGF.EmitBlock(ExitBB); 2611 // exit from construct; 2612 CodeGenFunction::JumpDest CancelDestination = 2613 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2614 CGF.EmitBranchThroughCleanup(CancelDestination); 2615 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2616 } 2617 return; 2618 } 2619 } 2620 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2621 CGM.getModule(), OMPRTL___kmpc_barrier), 2622 Args); 2623 } 2624 2625 /// Map the OpenMP loop schedule to the runtime enumeration. 2626 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2627 bool Chunked, bool Ordered) { 2628 switch (ScheduleKind) { 2629 case OMPC_SCHEDULE_static: 2630 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2631 : (Ordered ? OMP_ord_static : OMP_sch_static); 2632 case OMPC_SCHEDULE_dynamic: 2633 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2634 case OMPC_SCHEDULE_guided: 2635 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2636 case OMPC_SCHEDULE_runtime: 2637 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2638 case OMPC_SCHEDULE_auto: 2639 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2640 case OMPC_SCHEDULE_unknown: 2641 assert(!Chunked && "chunk was specified but schedule kind not known"); 2642 return Ordered ? OMP_ord_static : OMP_sch_static; 2643 } 2644 llvm_unreachable("Unexpected runtime schedule"); 2645 } 2646 2647 /// Map the OpenMP distribute schedule to the runtime enumeration. 2648 static OpenMPSchedType 2649 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2650 // only static is allowed for dist_schedule 2651 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2652 } 2653 2654 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2655 bool Chunked) const { 2656 OpenMPSchedType Schedule = 2657 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2658 return Schedule == OMP_sch_static; 2659 } 2660 2661 bool CGOpenMPRuntime::isStaticNonchunked( 2662 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2663 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2664 return Schedule == OMP_dist_sch_static; 2665 } 2666 2667 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2668 bool Chunked) const { 2669 OpenMPSchedType Schedule = 2670 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2671 return Schedule == OMP_sch_static_chunked; 2672 } 2673 2674 bool CGOpenMPRuntime::isStaticChunked( 2675 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2676 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2677 return Schedule == OMP_dist_sch_static_chunked; 2678 } 2679 2680 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2681 OpenMPSchedType Schedule = 2682 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2683 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2684 return Schedule != OMP_sch_static; 2685 } 2686 2687 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2688 OpenMPScheduleClauseModifier M1, 2689 OpenMPScheduleClauseModifier M2) { 2690 int Modifier = 0; 2691 switch (M1) { 2692 case OMPC_SCHEDULE_MODIFIER_monotonic: 2693 Modifier = OMP_sch_modifier_monotonic; 2694 break; 2695 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2696 Modifier = OMP_sch_modifier_nonmonotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_simd: 2699 if (Schedule == OMP_sch_static_chunked) 2700 Schedule = OMP_sch_static_balanced_chunked; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_last: 2703 case OMPC_SCHEDULE_MODIFIER_unknown: 2704 break; 2705 } 2706 switch (M2) { 2707 case OMPC_SCHEDULE_MODIFIER_monotonic: 2708 Modifier = OMP_sch_modifier_monotonic; 2709 break; 2710 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2711 Modifier = OMP_sch_modifier_nonmonotonic; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_simd: 2714 if (Schedule == OMP_sch_static_chunked) 2715 Schedule = OMP_sch_static_balanced_chunked; 2716 break; 2717 case OMPC_SCHEDULE_MODIFIER_last: 2718 case OMPC_SCHEDULE_MODIFIER_unknown: 2719 break; 2720 } 2721 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2722 // If the static schedule kind is specified or if the ordered clause is 2723 // specified, and if the nonmonotonic modifier is not specified, the effect is 2724 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2725 // modifier is specified, the effect is as if the nonmonotonic modifier is 2726 // specified. 2727 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2728 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2729 Schedule == OMP_sch_static_balanced_chunked || 2730 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2731 Schedule == OMP_dist_sch_static_chunked || 2732 Schedule == OMP_dist_sch_static)) 2733 Modifier = OMP_sch_modifier_nonmonotonic; 2734 } 2735 return Schedule | Modifier; 2736 } 2737 2738 void CGOpenMPRuntime::emitForDispatchInit( 2739 CodeGenFunction &CGF, SourceLocation Loc, 2740 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2741 bool Ordered, const DispatchRTInput &DispatchValues) { 2742 if (!CGF.HaveInsertPoint()) 2743 return; 2744 OpenMPSchedType Schedule = getRuntimeSchedule( 2745 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2746 assert(Ordered || 2747 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2748 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2749 Schedule != OMP_sch_static_balanced_chunked)); 2750 // Call __kmpc_dispatch_init( 2751 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2752 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2753 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2754 2755 // If the Chunk was not specified in the clause - use default value 1. 2756 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2757 : CGF.Builder.getIntN(IVSize, 1); 2758 llvm::Value *Args[] = { 2759 emitUpdateLocation(CGF, Loc), 2760 getThreadID(CGF, Loc), 2761 CGF.Builder.getInt32(addMonoNonMonoModifier( 2762 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2763 DispatchValues.LB, // Lower 2764 DispatchValues.UB, // Upper 2765 CGF.Builder.getIntN(IVSize, 1), // Stride 2766 Chunk // Chunk 2767 }; 2768 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2769 } 2770 2771 static void emitForStaticInitCall( 2772 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2773 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2774 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2775 const CGOpenMPRuntime::StaticRTInput &Values) { 2776 if (!CGF.HaveInsertPoint()) 2777 return; 2778 2779 assert(!Values.Ordered); 2780 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2781 Schedule == OMP_sch_static_balanced_chunked || 2782 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2783 Schedule == OMP_dist_sch_static || 2784 Schedule == OMP_dist_sch_static_chunked); 2785 2786 // Call __kmpc_for_static_init( 2787 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2788 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2789 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2790 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2791 llvm::Value *Chunk = Values.Chunk; 2792 if (Chunk == nullptr) { 2793 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2794 Schedule == OMP_dist_sch_static) && 2795 "expected static non-chunked schedule"); 2796 // If the Chunk was not specified in the clause - use default value 1. 2797 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2798 } else { 2799 assert((Schedule == OMP_sch_static_chunked || 2800 Schedule == OMP_sch_static_balanced_chunked || 2801 Schedule == OMP_ord_static_chunked || 2802 Schedule == OMP_dist_sch_static_chunked) && 2803 "expected static chunked schedule"); 2804 } 2805 llvm::Value *Args[] = { 2806 UpdateLocation, 2807 ThreadId, 2808 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2809 M2)), // Schedule type 2810 Values.IL.getPointer(), // &isLastIter 2811 Values.LB.getPointer(), // &LB 2812 Values.UB.getPointer(), // &UB 2813 Values.ST.getPointer(), // &Stride 2814 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2815 Chunk // Chunk 2816 }; 2817 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2818 } 2819 2820 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2821 SourceLocation Loc, 2822 OpenMPDirectiveKind DKind, 2823 const OpenMPScheduleTy &ScheduleKind, 2824 const StaticRTInput &Values) { 2825 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2826 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2827 assert(isOpenMPWorksharingDirective(DKind) && 2828 "Expected loop-based or sections-based directive."); 2829 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2830 isOpenMPLoopDirective(DKind) 2831 ? OMP_IDENT_WORK_LOOP 2832 : OMP_IDENT_WORK_SECTIONS); 2833 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2834 llvm::FunctionCallee StaticInitFunction = 2835 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2836 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2837 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2838 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2839 } 2840 2841 void CGOpenMPRuntime::emitDistributeStaticInit( 2842 CodeGenFunction &CGF, SourceLocation Loc, 2843 OpenMPDistScheduleClauseKind SchedKind, 2844 const CGOpenMPRuntime::StaticRTInput &Values) { 2845 OpenMPSchedType ScheduleNum = 2846 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2847 llvm::Value *UpdatedLocation = 2848 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2849 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2850 llvm::FunctionCallee StaticInitFunction; 2851 bool isGPUDistribute = 2852 CGM.getLangOpts().OpenMPIsDevice && 2853 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2854 StaticInitFunction = createForStaticInitFunction( 2855 Values.IVSize, Values.IVSigned, isGPUDistribute); 2856 2857 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2858 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2859 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2860 } 2861 2862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2863 SourceLocation Loc, 2864 OpenMPDirectiveKind DKind) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2868 llvm::Value *Args[] = { 2869 emitUpdateLocation(CGF, Loc, 2870 isOpenMPDistributeDirective(DKind) 2871 ? OMP_IDENT_WORK_DISTRIBUTE 2872 : isOpenMPLoopDirective(DKind) 2873 ? OMP_IDENT_WORK_LOOP 2874 : OMP_IDENT_WORK_SECTIONS), 2875 getThreadID(CGF, Loc)}; 2876 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2877 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2878 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2879 CGF.EmitRuntimeCall( 2880 OMPBuilder.getOrCreateRuntimeFunction( 2881 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2882 Args); 2883 else 2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2886 Args); 2887 } 2888 2889 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2890 SourceLocation Loc, 2891 unsigned IVSize, 2892 bool IVSigned) { 2893 if (!CGF.HaveInsertPoint()) 2894 return; 2895 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2896 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2897 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2898 } 2899 2900 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2901 SourceLocation Loc, unsigned IVSize, 2902 bool IVSigned, Address IL, 2903 Address LB, Address UB, 2904 Address ST) { 2905 // Call __kmpc_dispatch_next( 2906 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2907 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2908 // kmp_int[32|64] *p_stride); 2909 llvm::Value *Args[] = { 2910 emitUpdateLocation(CGF, Loc), 2911 getThreadID(CGF, Loc), 2912 IL.getPointer(), // &isLastIter 2913 LB.getPointer(), // &Lower 2914 UB.getPointer(), // &Upper 2915 ST.getPointer() // &Stride 2916 }; 2917 llvm::Value *Call = 2918 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2919 return CGF.EmitScalarConversion( 2920 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2921 CGF.getContext().BoolTy, Loc); 2922 } 2923 2924 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2925 llvm::Value *NumThreads, 2926 SourceLocation Loc) { 2927 if (!CGF.HaveInsertPoint()) 2928 return; 2929 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2930 llvm::Value *Args[] = { 2931 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2932 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2933 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2934 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2935 Args); 2936 } 2937 2938 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2939 ProcBindKind ProcBind, 2940 SourceLocation Loc) { 2941 if (!CGF.HaveInsertPoint()) 2942 return; 2943 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2944 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2945 llvm::Value *Args[] = { 2946 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2947 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2948 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2949 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2950 Args); 2951 } 2952 2953 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2954 SourceLocation Loc, llvm::AtomicOrdering AO) { 2955 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2956 OMPBuilder.createFlush(CGF.Builder); 2957 } else { 2958 if (!CGF.HaveInsertPoint()) 2959 return; 2960 // Build call void __kmpc_flush(ident_t *loc) 2961 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2962 CGM.getModule(), OMPRTL___kmpc_flush), 2963 emitUpdateLocation(CGF, Loc)); 2964 } 2965 } 2966 2967 namespace { 2968 /// Indexes of fields for type kmp_task_t. 2969 enum KmpTaskTFields { 2970 /// List of shared variables. 2971 KmpTaskTShareds, 2972 /// Task routine. 2973 KmpTaskTRoutine, 2974 /// Partition id for the untied tasks. 2975 KmpTaskTPartId, 2976 /// Function with call of destructors for private variables. 2977 Data1, 2978 /// Task priority. 2979 Data2, 2980 /// (Taskloops only) Lower bound. 2981 KmpTaskTLowerBound, 2982 /// (Taskloops only) Upper bound. 2983 KmpTaskTUpperBound, 2984 /// (Taskloops only) Stride. 2985 KmpTaskTStride, 2986 /// (Taskloops only) Is last iteration flag. 2987 KmpTaskTLastIter, 2988 /// (Taskloops only) Reduction data. 2989 KmpTaskTReductions, 2990 }; 2991 } // anonymous namespace 2992 2993 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2994 return OffloadEntriesTargetRegion.empty() && 2995 OffloadEntriesDeviceGlobalVar.empty(); 2996 } 2997 2998 /// Initialize target region entry. 2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3000 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3001 StringRef ParentName, unsigned LineNum, 3002 unsigned Order) { 3003 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3004 "only required for the device " 3005 "code generation."); 3006 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3007 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3008 OMPTargetRegionEntryTargetRegion); 3009 ++OffloadingEntriesNum; 3010 } 3011 3012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3013 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3014 StringRef ParentName, unsigned LineNum, 3015 llvm::Constant *Addr, llvm::Constant *ID, 3016 OMPTargetRegionEntryKind Flags) { 3017 // If we are emitting code for a target, the entry is already initialized, 3018 // only has to be registered. 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 // This could happen if the device compilation is invoked standalone. 3021 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3022 return; 3023 auto &Entry = 3024 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3025 Entry.setAddress(Addr); 3026 Entry.setID(ID); 3027 Entry.setFlags(Flags); 3028 } else { 3029 if (Flags == 3030 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3031 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3032 /*IgnoreAddressId*/ true)) 3033 return; 3034 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3035 "Target region entry already registered!"); 3036 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3037 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3038 ++OffloadingEntriesNum; 3039 } 3040 } 3041 3042 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3043 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3044 bool IgnoreAddressId) const { 3045 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3046 if (PerDevice == OffloadEntriesTargetRegion.end()) 3047 return false; 3048 auto PerFile = PerDevice->second.find(FileID); 3049 if (PerFile == PerDevice->second.end()) 3050 return false; 3051 auto PerParentName = PerFile->second.find(ParentName); 3052 if (PerParentName == PerFile->second.end()) 3053 return false; 3054 auto PerLine = PerParentName->second.find(LineNum); 3055 if (PerLine == PerParentName->second.end()) 3056 return false; 3057 // Fail if this entry is already registered. 3058 if (!IgnoreAddressId && 3059 (PerLine->second.getAddress() || PerLine->second.getID())) 3060 return false; 3061 return true; 3062 } 3063 3064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3065 const OffloadTargetRegionEntryInfoActTy &Action) { 3066 // Scan all target region entries and perform the provided action. 3067 for (const auto &D : OffloadEntriesTargetRegion) 3068 for (const auto &F : D.second) 3069 for (const auto &P : F.second) 3070 for (const auto &L : P.second) 3071 Action(D.first, F.first, P.first(), L.first, L.second); 3072 } 3073 3074 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3075 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3076 OMPTargetGlobalVarEntryKind Flags, 3077 unsigned Order) { 3078 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3079 "only required for the device " 3080 "code generation."); 3081 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3082 ++OffloadingEntriesNum; 3083 } 3084 3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3086 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3087 CharUnits VarSize, 3088 OMPTargetGlobalVarEntryKind Flags, 3089 llvm::GlobalValue::LinkageTypes Linkage) { 3090 if (CGM.getLangOpts().OpenMPIsDevice) { 3091 // This could happen if the device compilation is invoked standalone. 3092 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3093 return; 3094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3095 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3096 if (Entry.getVarSize().isZero()) { 3097 Entry.setVarSize(VarSize); 3098 Entry.setLinkage(Linkage); 3099 } 3100 return; 3101 } 3102 Entry.setVarSize(VarSize); 3103 Entry.setLinkage(Linkage); 3104 Entry.setAddress(Addr); 3105 } else { 3106 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3107 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3108 assert(Entry.isValid() && Entry.getFlags() == Flags && 3109 "Entry not initialized!"); 3110 if (Entry.getVarSize().isZero()) { 3111 Entry.setVarSize(VarSize); 3112 Entry.setLinkage(Linkage); 3113 } 3114 return; 3115 } 3116 OffloadEntriesDeviceGlobalVar.try_emplace( 3117 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3118 ++OffloadingEntriesNum; 3119 } 3120 } 3121 3122 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3123 actOnDeviceGlobalVarEntriesInfo( 3124 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3125 // Scan all target region entries and perform the provided action. 3126 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3127 Action(E.getKey(), E.getValue()); 3128 } 3129 3130 void CGOpenMPRuntime::createOffloadEntry( 3131 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3132 llvm::GlobalValue::LinkageTypes Linkage) { 3133 StringRef Name = Addr->getName(); 3134 llvm::Module &M = CGM.getModule(); 3135 llvm::LLVMContext &C = M.getContext(); 3136 3137 // Create constant string with the name. 3138 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3139 3140 std::string StringName = getName({"omp_offloading", "entry_name"}); 3141 auto *Str = new llvm::GlobalVariable( 3142 M, StrPtrInit->getType(), /*isConstant=*/true, 3143 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3144 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3145 3146 llvm::Constant *Data[] = { 3147 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3148 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3149 llvm::ConstantInt::get(CGM.SizeTy, Size), 3150 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3151 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3152 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3153 llvm::GlobalVariable *Entry = createGlobalStruct( 3154 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3155 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3156 3157 // The entry has to be created in the section the linker expects it to be. 3158 Entry->setSection("omp_offloading_entries"); 3159 } 3160 3161 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3162 // Emit the offloading entries and metadata so that the device codegen side 3163 // can easily figure out what to emit. The produced metadata looks like 3164 // this: 3165 // 3166 // !omp_offload.info = !{!1, ...} 3167 // 3168 // Right now we only generate metadata for function that contain target 3169 // regions. 3170 3171 // If we are in simd mode or there are no entries, we don't need to do 3172 // anything. 3173 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3174 return; 3175 3176 llvm::Module &M = CGM.getModule(); 3177 llvm::LLVMContext &C = M.getContext(); 3178 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3179 SourceLocation, StringRef>, 3180 16> 3181 OrderedEntries(OffloadEntriesInfoManager.size()); 3182 llvm::SmallVector<StringRef, 16> ParentFunctions( 3183 OffloadEntriesInfoManager.size()); 3184 3185 // Auxiliary methods to create metadata values and strings. 3186 auto &&GetMDInt = [this](unsigned V) { 3187 return llvm::ConstantAsMetadata::get( 3188 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3189 }; 3190 3191 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3192 3193 // Create the offloading info metadata node. 3194 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3195 3196 // Create function that emits metadata for each target region entry; 3197 auto &&TargetRegionMetadataEmitter = 3198 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3199 &GetMDString]( 3200 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3201 unsigned Line, 3202 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3203 // Generate metadata for target regions. Each entry of this metadata 3204 // contains: 3205 // - Entry 0 -> Kind of this type of metadata (0). 3206 // - Entry 1 -> Device ID of the file where the entry was identified. 3207 // - Entry 2 -> File ID of the file where the entry was identified. 3208 // - Entry 3 -> Mangled name of the function where the entry was 3209 // identified. 3210 // - Entry 4 -> Line in the file where the entry was identified. 3211 // - Entry 5 -> Order the entry was created. 3212 // The first element of the metadata node is the kind. 3213 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3214 GetMDInt(FileID), GetMDString(ParentName), 3215 GetMDInt(Line), GetMDInt(E.getOrder())}; 3216 3217 SourceLocation Loc; 3218 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3219 E = CGM.getContext().getSourceManager().fileinfo_end(); 3220 I != E; ++I) { 3221 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3222 I->getFirst()->getUniqueID().getFile() == FileID) { 3223 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3224 I->getFirst(), Line, 1); 3225 break; 3226 } 3227 } 3228 // Save this entry in the right position of the ordered entries array. 3229 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3230 ParentFunctions[E.getOrder()] = ParentName; 3231 3232 // Add metadata to the named metadata node. 3233 MD->addOperand(llvm::MDNode::get(C, Ops)); 3234 }; 3235 3236 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3237 TargetRegionMetadataEmitter); 3238 3239 // Create function that emits metadata for each device global variable entry; 3240 auto &&DeviceGlobalVarMetadataEmitter = 3241 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3242 MD](StringRef MangledName, 3243 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3244 &E) { 3245 // Generate metadata for global variables. Each entry of this metadata 3246 // contains: 3247 // - Entry 0 -> Kind of this type of metadata (1). 3248 // - Entry 1 -> Mangled name of the variable. 3249 // - Entry 2 -> Declare target kind. 3250 // - Entry 3 -> Order the entry was created. 3251 // The first element of the metadata node is the kind. 3252 llvm::Metadata *Ops[] = { 3253 GetMDInt(E.getKind()), GetMDString(MangledName), 3254 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3255 3256 // Save this entry in the right position of the ordered entries array. 3257 OrderedEntries[E.getOrder()] = 3258 std::make_tuple(&E, SourceLocation(), MangledName); 3259 3260 // Add metadata to the named metadata node. 3261 MD->addOperand(llvm::MDNode::get(C, Ops)); 3262 }; 3263 3264 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3265 DeviceGlobalVarMetadataEmitter); 3266 3267 for (const auto &E : OrderedEntries) { 3268 assert(std::get<0>(E) && "All ordered entries must exist!"); 3269 if (const auto *CE = 3270 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3271 std::get<0>(E))) { 3272 if (!CE->getID() || !CE->getAddress()) { 3273 // Do not blame the entry if the parent funtion is not emitted. 3274 StringRef FnName = ParentFunctions[CE->getOrder()]; 3275 if (!CGM.GetGlobalValue(FnName)) 3276 continue; 3277 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3278 DiagnosticsEngine::Error, 3279 "Offloading entry for target region in %0 is incorrect: either the " 3280 "address or the ID is invalid."); 3281 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3282 continue; 3283 } 3284 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3285 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3286 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3287 OffloadEntryInfoDeviceGlobalVar>( 3288 std::get<0>(E))) { 3289 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3290 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3291 CE->getFlags()); 3292 switch (Flags) { 3293 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3294 if (CGM.getLangOpts().OpenMPIsDevice && 3295 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3296 continue; 3297 if (!CE->getAddress()) { 3298 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3299 DiagnosticsEngine::Error, "Offloading entry for declare target " 3300 "variable %0 is incorrect: the " 3301 "address is invalid."); 3302 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3303 continue; 3304 } 3305 // The vaiable has no definition - no need to add the entry. 3306 if (CE->getVarSize().isZero()) 3307 continue; 3308 break; 3309 } 3310 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3311 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3312 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3313 "Declaret target link address is set."); 3314 if (CGM.getLangOpts().OpenMPIsDevice) 3315 continue; 3316 if (!CE->getAddress()) { 3317 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3318 DiagnosticsEngine::Error, 3319 "Offloading entry for declare target variable is incorrect: the " 3320 "address is invalid."); 3321 CGM.getDiags().Report(DiagID); 3322 continue; 3323 } 3324 break; 3325 } 3326 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3327 CE->getVarSize().getQuantity(), Flags, 3328 CE->getLinkage()); 3329 } else { 3330 llvm_unreachable("Unsupported entry kind."); 3331 } 3332 } 3333 } 3334 3335 /// Loads all the offload entries information from the host IR 3336 /// metadata. 3337 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3338 // If we are in target mode, load the metadata from the host IR. This code has 3339 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3340 3341 if (!CGM.getLangOpts().OpenMPIsDevice) 3342 return; 3343 3344 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3345 return; 3346 3347 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3348 if (auto EC = Buf.getError()) { 3349 CGM.getDiags().Report(diag::err_cannot_open_file) 3350 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3351 return; 3352 } 3353 3354 llvm::LLVMContext C; 3355 auto ME = expectedToErrorOrAndEmitErrors( 3356 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3357 3358 if (auto EC = ME.getError()) { 3359 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3360 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3361 CGM.getDiags().Report(DiagID) 3362 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3363 return; 3364 } 3365 3366 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3367 if (!MD) 3368 return; 3369 3370 for (llvm::MDNode *MN : MD->operands()) { 3371 auto &&GetMDInt = [MN](unsigned Idx) { 3372 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3373 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3374 }; 3375 3376 auto &&GetMDString = [MN](unsigned Idx) { 3377 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3378 return V->getString(); 3379 }; 3380 3381 switch (GetMDInt(0)) { 3382 default: 3383 llvm_unreachable("Unexpected metadata!"); 3384 break; 3385 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3386 OffloadingEntryInfoTargetRegion: 3387 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3388 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3389 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3390 /*Order=*/GetMDInt(5)); 3391 break; 3392 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3393 OffloadingEntryInfoDeviceGlobalVar: 3394 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3395 /*MangledName=*/GetMDString(1), 3396 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3397 /*Flags=*/GetMDInt(2)), 3398 /*Order=*/GetMDInt(3)); 3399 break; 3400 } 3401 } 3402 } 3403 3404 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3405 if (!KmpRoutineEntryPtrTy) { 3406 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3407 ASTContext &C = CGM.getContext(); 3408 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3409 FunctionProtoType::ExtProtoInfo EPI; 3410 KmpRoutineEntryPtrQTy = C.getPointerType( 3411 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3412 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3413 } 3414 } 3415 3416 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3417 // Make sure the type of the entry is already created. This is the type we 3418 // have to create: 3419 // struct __tgt_offload_entry{ 3420 // void *addr; // Pointer to the offload entry info. 3421 // // (function or global) 3422 // char *name; // Name of the function or global. 3423 // size_t size; // Size of the entry info (0 if it a function). 3424 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3425 // int32_t reserved; // Reserved, to use by the runtime library. 3426 // }; 3427 if (TgtOffloadEntryQTy.isNull()) { 3428 ASTContext &C = CGM.getContext(); 3429 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3430 RD->startDefinition(); 3431 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3432 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3433 addFieldToRecordDecl(C, RD, C.getSizeType()); 3434 addFieldToRecordDecl( 3435 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3436 addFieldToRecordDecl( 3437 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3438 RD->completeDefinition(); 3439 RD->addAttr(PackedAttr::CreateImplicit(C)); 3440 TgtOffloadEntryQTy = C.getRecordType(RD); 3441 } 3442 return TgtOffloadEntryQTy; 3443 } 3444 3445 namespace { 3446 struct PrivateHelpersTy { 3447 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3448 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3449 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3450 PrivateElemInit(PrivateElemInit) {} 3451 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3452 const Expr *OriginalRef = nullptr; 3453 const VarDecl *Original = nullptr; 3454 const VarDecl *PrivateCopy = nullptr; 3455 const VarDecl *PrivateElemInit = nullptr; 3456 bool isLocalPrivate() const { 3457 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3458 } 3459 }; 3460 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3461 } // anonymous namespace 3462 3463 static bool isAllocatableDecl(const VarDecl *VD) { 3464 const VarDecl *CVD = VD->getCanonicalDecl(); 3465 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3466 return false; 3467 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3468 // Use the default allocation. 3469 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3470 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3471 !AA->getAllocator()); 3472 } 3473 3474 static RecordDecl * 3475 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3476 if (!Privates.empty()) { 3477 ASTContext &C = CGM.getContext(); 3478 // Build struct .kmp_privates_t. { 3479 // /* private vars */ 3480 // }; 3481 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3482 RD->startDefinition(); 3483 for (const auto &Pair : Privates) { 3484 const VarDecl *VD = Pair.second.Original; 3485 QualType Type = VD->getType().getNonReferenceType(); 3486 // If the private variable is a local variable with lvalue ref type, 3487 // allocate the pointer instead of the pointee type. 3488 if (Pair.second.isLocalPrivate()) { 3489 if (VD->getType()->isLValueReferenceType()) 3490 Type = C.getPointerType(Type); 3491 if (isAllocatableDecl(VD)) 3492 Type = C.getPointerType(Type); 3493 } 3494 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3495 if (VD->hasAttrs()) { 3496 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3497 E(VD->getAttrs().end()); 3498 I != E; ++I) 3499 FD->addAttr(*I); 3500 } 3501 } 3502 RD->completeDefinition(); 3503 return RD; 3504 } 3505 return nullptr; 3506 } 3507 3508 static RecordDecl * 3509 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3510 QualType KmpInt32Ty, 3511 QualType KmpRoutineEntryPointerQTy) { 3512 ASTContext &C = CGM.getContext(); 3513 // Build struct kmp_task_t { 3514 // void * shareds; 3515 // kmp_routine_entry_t routine; 3516 // kmp_int32 part_id; 3517 // kmp_cmplrdata_t data1; 3518 // kmp_cmplrdata_t data2; 3519 // For taskloops additional fields: 3520 // kmp_uint64 lb; 3521 // kmp_uint64 ub; 3522 // kmp_int64 st; 3523 // kmp_int32 liter; 3524 // void * reductions; 3525 // }; 3526 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3527 UD->startDefinition(); 3528 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3529 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3530 UD->completeDefinition(); 3531 QualType KmpCmplrdataTy = C.getRecordType(UD); 3532 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3533 RD->startDefinition(); 3534 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3535 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3536 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3537 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3538 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3539 if (isOpenMPTaskLoopDirective(Kind)) { 3540 QualType KmpUInt64Ty = 3541 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3542 QualType KmpInt64Ty = 3543 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3544 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3545 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3546 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3547 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3548 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3549 } 3550 RD->completeDefinition(); 3551 return RD; 3552 } 3553 3554 static RecordDecl * 3555 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3556 ArrayRef<PrivateDataTy> Privates) { 3557 ASTContext &C = CGM.getContext(); 3558 // Build struct kmp_task_t_with_privates { 3559 // kmp_task_t task_data; 3560 // .kmp_privates_t. privates; 3561 // }; 3562 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3563 RD->startDefinition(); 3564 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3565 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3566 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3567 RD->completeDefinition(); 3568 return RD; 3569 } 3570 3571 /// Emit a proxy function which accepts kmp_task_t as the second 3572 /// argument. 3573 /// \code 3574 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3575 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3576 /// For taskloops: 3577 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3578 /// tt->reductions, tt->shareds); 3579 /// return 0; 3580 /// } 3581 /// \endcode 3582 static llvm::Function * 3583 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3584 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3585 QualType KmpTaskTWithPrivatesPtrQTy, 3586 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3587 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3588 llvm::Value *TaskPrivatesMap) { 3589 ASTContext &C = CGM.getContext(); 3590 FunctionArgList Args; 3591 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3592 ImplicitParamDecl::Other); 3593 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3594 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3595 ImplicitParamDecl::Other); 3596 Args.push_back(&GtidArg); 3597 Args.push_back(&TaskTypeArg); 3598 const auto &TaskEntryFnInfo = 3599 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3600 llvm::FunctionType *TaskEntryTy = 3601 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3602 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3603 auto *TaskEntry = llvm::Function::Create( 3604 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3605 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3606 TaskEntry->setDoesNotRecurse(); 3607 CodeGenFunction CGF(CGM); 3608 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3609 Loc, Loc); 3610 3611 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3612 // tt, 3613 // For taskloops: 3614 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3615 // tt->task_data.shareds); 3616 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3617 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3618 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3619 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3620 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3621 const auto *KmpTaskTWithPrivatesQTyRD = 3622 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3623 LValue Base = 3624 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3625 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3626 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3627 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3628 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3629 3630 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3631 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3632 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3633 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3634 CGF.ConvertTypeForMem(SharedsPtrTy)); 3635 3636 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3637 llvm::Value *PrivatesParam; 3638 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3639 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3640 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3641 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3642 } else { 3643 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3644 } 3645 3646 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3647 TaskPrivatesMap, 3648 CGF.Builder 3649 .CreatePointerBitCastOrAddrSpaceCast( 3650 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3651 .getPointer()}; 3652 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3653 std::end(CommonArgs)); 3654 if (isOpenMPTaskLoopDirective(Kind)) { 3655 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3656 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3657 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3658 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3659 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3660 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3661 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3662 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3663 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3664 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3665 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3666 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3667 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3668 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3669 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3670 CallArgs.push_back(LBParam); 3671 CallArgs.push_back(UBParam); 3672 CallArgs.push_back(StParam); 3673 CallArgs.push_back(LIParam); 3674 CallArgs.push_back(RParam); 3675 } 3676 CallArgs.push_back(SharedsParam); 3677 3678 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3679 CallArgs); 3680 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3681 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3682 CGF.FinishFunction(); 3683 return TaskEntry; 3684 } 3685 3686 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3687 SourceLocation Loc, 3688 QualType KmpInt32Ty, 3689 QualType KmpTaskTWithPrivatesPtrQTy, 3690 QualType KmpTaskTWithPrivatesQTy) { 3691 ASTContext &C = CGM.getContext(); 3692 FunctionArgList Args; 3693 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3694 ImplicitParamDecl::Other); 3695 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3696 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3697 ImplicitParamDecl::Other); 3698 Args.push_back(&GtidArg); 3699 Args.push_back(&TaskTypeArg); 3700 const auto &DestructorFnInfo = 3701 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3702 llvm::FunctionType *DestructorFnTy = 3703 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3704 std::string Name = 3705 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3706 auto *DestructorFn = 3707 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3708 Name, &CGM.getModule()); 3709 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3710 DestructorFnInfo); 3711 DestructorFn->setDoesNotRecurse(); 3712 CodeGenFunction CGF(CGM); 3713 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3714 Args, Loc, Loc); 3715 3716 LValue Base = CGF.EmitLoadOfPointerLValue( 3717 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3718 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3719 const auto *KmpTaskTWithPrivatesQTyRD = 3720 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3721 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3722 Base = CGF.EmitLValueForField(Base, *FI); 3723 for (const auto *Field : 3724 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3725 if (QualType::DestructionKind DtorKind = 3726 Field->getType().isDestructedType()) { 3727 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3728 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3729 } 3730 } 3731 CGF.FinishFunction(); 3732 return DestructorFn; 3733 } 3734 3735 /// Emit a privates mapping function for correct handling of private and 3736 /// firstprivate variables. 3737 /// \code 3738 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3739 /// **noalias priv1,..., <tyn> **noalias privn) { 3740 /// *priv1 = &.privates.priv1; 3741 /// ...; 3742 /// *privn = &.privates.privn; 3743 /// } 3744 /// \endcode 3745 static llvm::Value * 3746 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3747 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3748 ArrayRef<PrivateDataTy> Privates) { 3749 ASTContext &C = CGM.getContext(); 3750 FunctionArgList Args; 3751 ImplicitParamDecl TaskPrivatesArg( 3752 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3753 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3754 ImplicitParamDecl::Other); 3755 Args.push_back(&TaskPrivatesArg); 3756 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3757 unsigned Counter = 1; 3758 for (const Expr *E : Data.PrivateVars) { 3759 Args.push_back(ImplicitParamDecl::Create( 3760 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3761 C.getPointerType(C.getPointerType(E->getType())) 3762 .withConst() 3763 .withRestrict(), 3764 ImplicitParamDecl::Other)); 3765 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3766 PrivateVarsPos[VD] = Counter; 3767 ++Counter; 3768 } 3769 for (const Expr *E : Data.FirstprivateVars) { 3770 Args.push_back(ImplicitParamDecl::Create( 3771 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3772 C.getPointerType(C.getPointerType(E->getType())) 3773 .withConst() 3774 .withRestrict(), 3775 ImplicitParamDecl::Other)); 3776 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3777 PrivateVarsPos[VD] = Counter; 3778 ++Counter; 3779 } 3780 for (const Expr *E : Data.LastprivateVars) { 3781 Args.push_back(ImplicitParamDecl::Create( 3782 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3783 C.getPointerType(C.getPointerType(E->getType())) 3784 .withConst() 3785 .withRestrict(), 3786 ImplicitParamDecl::Other)); 3787 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3788 PrivateVarsPos[VD] = Counter; 3789 ++Counter; 3790 } 3791 for (const VarDecl *VD : Data.PrivateLocals) { 3792 QualType Ty = VD->getType().getNonReferenceType(); 3793 if (VD->getType()->isLValueReferenceType()) 3794 Ty = C.getPointerType(Ty); 3795 if (isAllocatableDecl(VD)) 3796 Ty = C.getPointerType(Ty); 3797 Args.push_back(ImplicitParamDecl::Create( 3798 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3799 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3800 ImplicitParamDecl::Other)); 3801 PrivateVarsPos[VD] = Counter; 3802 ++Counter; 3803 } 3804 const auto &TaskPrivatesMapFnInfo = 3805 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3806 llvm::FunctionType *TaskPrivatesMapTy = 3807 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3808 std::string Name = 3809 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3810 auto *TaskPrivatesMap = llvm::Function::Create( 3811 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3812 &CGM.getModule()); 3813 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3814 TaskPrivatesMapFnInfo); 3815 if (CGM.getLangOpts().Optimize) { 3816 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3817 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3818 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3819 } 3820 CodeGenFunction CGF(CGM); 3821 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3822 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3823 3824 // *privi = &.privates.privi; 3825 LValue Base = CGF.EmitLoadOfPointerLValue( 3826 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3827 TaskPrivatesArg.getType()->castAs<PointerType>()); 3828 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3829 Counter = 0; 3830 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3831 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3832 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3833 LValue RefLVal = 3834 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3835 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3836 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3837 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3838 ++Counter; 3839 } 3840 CGF.FinishFunction(); 3841 return TaskPrivatesMap; 3842 } 3843 3844 /// Emit initialization for private variables in task-based directives. 3845 static void emitPrivatesInit(CodeGenFunction &CGF, 3846 const OMPExecutableDirective &D, 3847 Address KmpTaskSharedsPtr, LValue TDBase, 3848 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3849 QualType SharedsTy, QualType SharedsPtrTy, 3850 const OMPTaskDataTy &Data, 3851 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3852 ASTContext &C = CGF.getContext(); 3853 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3854 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3855 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3856 ? OMPD_taskloop 3857 : OMPD_task; 3858 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3859 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3860 LValue SrcBase; 3861 bool IsTargetTask = 3862 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3863 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3864 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3865 // PointersArray, SizesArray, and MappersArray. The original variables for 3866 // these arrays are not captured and we get their addresses explicitly. 3867 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3868 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3869 SrcBase = CGF.MakeAddrLValue( 3870 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3871 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3872 SharedsTy); 3873 } 3874 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3875 for (const PrivateDataTy &Pair : Privates) { 3876 // Do not initialize private locals. 3877 if (Pair.second.isLocalPrivate()) { 3878 ++FI; 3879 continue; 3880 } 3881 const VarDecl *VD = Pair.second.PrivateCopy; 3882 const Expr *Init = VD->getAnyInitializer(); 3883 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3884 !CGF.isTrivialInitializer(Init)))) { 3885 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3886 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3887 const VarDecl *OriginalVD = Pair.second.Original; 3888 // Check if the variable is the target-based BasePointersArray, 3889 // PointersArray, SizesArray, or MappersArray. 3890 LValue SharedRefLValue; 3891 QualType Type = PrivateLValue.getType(); 3892 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3893 if (IsTargetTask && !SharedField) { 3894 assert(isa<ImplicitParamDecl>(OriginalVD) && 3895 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3896 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3897 ->getNumParams() == 0 && 3898 isa<TranslationUnitDecl>( 3899 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3900 ->getDeclContext()) && 3901 "Expected artificial target data variable."); 3902 SharedRefLValue = 3903 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3904 } else if (ForDup) { 3905 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3906 SharedRefLValue = CGF.MakeAddrLValue( 3907 Address(SharedRefLValue.getPointer(CGF), 3908 C.getDeclAlign(OriginalVD)), 3909 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3910 SharedRefLValue.getTBAAInfo()); 3911 } else if (CGF.LambdaCaptureFields.count( 3912 Pair.second.Original->getCanonicalDecl()) > 0 || 3913 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3914 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3915 } else { 3916 // Processing for implicitly captured variables. 3917 InlinedOpenMPRegionRAII Region( 3918 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3919 /*HasCancel=*/false, /*NoInheritance=*/true); 3920 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3921 } 3922 if (Type->isArrayType()) { 3923 // Initialize firstprivate array. 3924 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3925 // Perform simple memcpy. 3926 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3927 } else { 3928 // Initialize firstprivate array using element-by-element 3929 // initialization. 3930 CGF.EmitOMPAggregateAssign( 3931 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3932 Type, 3933 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3934 Address SrcElement) { 3935 // Clean up any temporaries needed by the initialization. 3936 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3937 InitScope.addPrivate( 3938 Elem, [SrcElement]() -> Address { return SrcElement; }); 3939 (void)InitScope.Privatize(); 3940 // Emit initialization for single element. 3941 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3942 CGF, &CapturesInfo); 3943 CGF.EmitAnyExprToMem(Init, DestElement, 3944 Init->getType().getQualifiers(), 3945 /*IsInitializer=*/false); 3946 }); 3947 } 3948 } else { 3949 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3950 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3951 return SharedRefLValue.getAddress(CGF); 3952 }); 3953 (void)InitScope.Privatize(); 3954 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3955 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3956 /*capturedByInit=*/false); 3957 } 3958 } else { 3959 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3960 } 3961 } 3962 ++FI; 3963 } 3964 } 3965 3966 /// Check if duplication function is required for taskloops. 3967 static bool checkInitIsRequired(CodeGenFunction &CGF, 3968 ArrayRef<PrivateDataTy> Privates) { 3969 bool InitRequired = false; 3970 for (const PrivateDataTy &Pair : Privates) { 3971 if (Pair.second.isLocalPrivate()) 3972 continue; 3973 const VarDecl *VD = Pair.second.PrivateCopy; 3974 const Expr *Init = VD->getAnyInitializer(); 3975 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3976 !CGF.isTrivialInitializer(Init)); 3977 if (InitRequired) 3978 break; 3979 } 3980 return InitRequired; 3981 } 3982 3983 3984 /// Emit task_dup function (for initialization of 3985 /// private/firstprivate/lastprivate vars and last_iter flag) 3986 /// \code 3987 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3988 /// lastpriv) { 3989 /// // setup lastprivate flag 3990 /// task_dst->last = lastpriv; 3991 /// // could be constructor calls here... 3992 /// } 3993 /// \endcode 3994 static llvm::Value * 3995 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3996 const OMPExecutableDirective &D, 3997 QualType KmpTaskTWithPrivatesPtrQTy, 3998 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3999 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4000 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4001 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4002 ASTContext &C = CGM.getContext(); 4003 FunctionArgList Args; 4004 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4005 KmpTaskTWithPrivatesPtrQTy, 4006 ImplicitParamDecl::Other); 4007 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4008 KmpTaskTWithPrivatesPtrQTy, 4009 ImplicitParamDecl::Other); 4010 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4011 ImplicitParamDecl::Other); 4012 Args.push_back(&DstArg); 4013 Args.push_back(&SrcArg); 4014 Args.push_back(&LastprivArg); 4015 const auto &TaskDupFnInfo = 4016 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4017 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4018 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4019 auto *TaskDup = llvm::Function::Create( 4020 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4021 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4022 TaskDup->setDoesNotRecurse(); 4023 CodeGenFunction CGF(CGM); 4024 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4025 Loc); 4026 4027 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4028 CGF.GetAddrOfLocalVar(&DstArg), 4029 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4030 // task_dst->liter = lastpriv; 4031 if (WithLastIter) { 4032 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4033 LValue Base = CGF.EmitLValueForField( 4034 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4035 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4036 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4037 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4038 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4039 } 4040 4041 // Emit initial values for private copies (if any). 4042 assert(!Privates.empty()); 4043 Address KmpTaskSharedsPtr = Address::invalid(); 4044 if (!Data.FirstprivateVars.empty()) { 4045 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4046 CGF.GetAddrOfLocalVar(&SrcArg), 4047 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4048 LValue Base = CGF.EmitLValueForField( 4049 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4050 KmpTaskSharedsPtr = Address( 4051 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4052 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4053 KmpTaskTShareds)), 4054 Loc), 4055 CGM.getNaturalTypeAlignment(SharedsTy)); 4056 } 4057 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4058 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4059 CGF.FinishFunction(); 4060 return TaskDup; 4061 } 4062 4063 /// Checks if destructor function is required to be generated. 4064 /// \return true if cleanups are required, false otherwise. 4065 static bool 4066 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4067 ArrayRef<PrivateDataTy> Privates) { 4068 for (const PrivateDataTy &P : Privates) { 4069 if (P.second.isLocalPrivate()) 4070 continue; 4071 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4072 if (Ty.isDestructedType()) 4073 return true; 4074 } 4075 return false; 4076 } 4077 4078 namespace { 4079 /// Loop generator for OpenMP iterator expression. 4080 class OMPIteratorGeneratorScope final 4081 : public CodeGenFunction::OMPPrivateScope { 4082 CodeGenFunction &CGF; 4083 const OMPIteratorExpr *E = nullptr; 4084 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4085 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4086 OMPIteratorGeneratorScope() = delete; 4087 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4088 4089 public: 4090 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4091 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4092 if (!E) 4093 return; 4094 SmallVector<llvm::Value *, 4> Uppers; 4095 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4096 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4097 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4098 addPrivate(VD, [&CGF, VD]() { 4099 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4100 }); 4101 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4102 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4103 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4104 "counter.addr"); 4105 }); 4106 } 4107 Privatize(); 4108 4109 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4110 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4111 LValue CLVal = 4112 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4113 HelperData.CounterVD->getType()); 4114 // Counter = 0; 4115 CGF.EmitStoreOfScalar( 4116 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4117 CLVal); 4118 CodeGenFunction::JumpDest &ContDest = 4119 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4120 CodeGenFunction::JumpDest &ExitDest = 4121 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4122 // N = <number-of_iterations>; 4123 llvm::Value *N = Uppers[I]; 4124 // cont: 4125 // if (Counter < N) goto body; else goto exit; 4126 CGF.EmitBlock(ContDest.getBlock()); 4127 auto *CVal = 4128 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4129 llvm::Value *Cmp = 4130 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4131 ? CGF.Builder.CreateICmpSLT(CVal, N) 4132 : CGF.Builder.CreateICmpULT(CVal, N); 4133 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4134 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4135 // body: 4136 CGF.EmitBlock(BodyBB); 4137 // Iteri = Begini + Counter * Stepi; 4138 CGF.EmitIgnoredExpr(HelperData.Update); 4139 } 4140 } 4141 ~OMPIteratorGeneratorScope() { 4142 if (!E) 4143 return; 4144 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4145 // Counter = Counter + 1; 4146 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4147 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4148 // goto cont; 4149 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4150 // exit: 4151 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4152 } 4153 } 4154 }; 4155 } // namespace 4156 4157 static std::pair<llvm::Value *, llvm::Value *> 4158 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4159 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4160 llvm::Value *Addr; 4161 if (OASE) { 4162 const Expr *Base = OASE->getBase(); 4163 Addr = CGF.EmitScalarExpr(Base); 4164 } else { 4165 Addr = CGF.EmitLValue(E).getPointer(CGF); 4166 } 4167 llvm::Value *SizeVal; 4168 QualType Ty = E->getType(); 4169 if (OASE) { 4170 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4171 for (const Expr *SE : OASE->getDimensions()) { 4172 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4173 Sz = CGF.EmitScalarConversion( 4174 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4175 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4176 } 4177 } else if (const auto *ASE = 4178 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4179 LValue UpAddrLVal = 4180 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4181 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4182 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4183 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4184 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4185 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4186 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4187 } else { 4188 SizeVal = CGF.getTypeSize(Ty); 4189 } 4190 return std::make_pair(Addr, SizeVal); 4191 } 4192 4193 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4194 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4195 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4196 if (KmpTaskAffinityInfoTy.isNull()) { 4197 RecordDecl *KmpAffinityInfoRD = 4198 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4199 KmpAffinityInfoRD->startDefinition(); 4200 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4201 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4202 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4203 KmpAffinityInfoRD->completeDefinition(); 4204 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4205 } 4206 } 4207 4208 CGOpenMPRuntime::TaskResultTy 4209 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4210 const OMPExecutableDirective &D, 4211 llvm::Function *TaskFunction, QualType SharedsTy, 4212 Address Shareds, const OMPTaskDataTy &Data) { 4213 ASTContext &C = CGM.getContext(); 4214 llvm::SmallVector<PrivateDataTy, 4> Privates; 4215 // Aggregate privates and sort them by the alignment. 4216 const auto *I = Data.PrivateCopies.begin(); 4217 for (const Expr *E : Data.PrivateVars) { 4218 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4219 Privates.emplace_back( 4220 C.getDeclAlign(VD), 4221 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4222 /*PrivateElemInit=*/nullptr)); 4223 ++I; 4224 } 4225 I = Data.FirstprivateCopies.begin(); 4226 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4227 for (const Expr *E : Data.FirstprivateVars) { 4228 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4229 Privates.emplace_back( 4230 C.getDeclAlign(VD), 4231 PrivateHelpersTy( 4232 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4233 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4234 ++I; 4235 ++IElemInitRef; 4236 } 4237 I = Data.LastprivateCopies.begin(); 4238 for (const Expr *E : Data.LastprivateVars) { 4239 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4240 Privates.emplace_back( 4241 C.getDeclAlign(VD), 4242 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4243 /*PrivateElemInit=*/nullptr)); 4244 ++I; 4245 } 4246 for (const VarDecl *VD : Data.PrivateLocals) { 4247 if (isAllocatableDecl(VD)) 4248 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4249 else 4250 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4251 } 4252 llvm::stable_sort(Privates, 4253 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4254 return L.first > R.first; 4255 }); 4256 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4257 // Build type kmp_routine_entry_t (if not built yet). 4258 emitKmpRoutineEntryT(KmpInt32Ty); 4259 // Build type kmp_task_t (if not built yet). 4260 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4261 if (SavedKmpTaskloopTQTy.isNull()) { 4262 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4263 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4264 } 4265 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4266 } else { 4267 assert((D.getDirectiveKind() == OMPD_task || 4268 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4269 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4270 "Expected taskloop, task or target directive"); 4271 if (SavedKmpTaskTQTy.isNull()) { 4272 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4273 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4274 } 4275 KmpTaskTQTy = SavedKmpTaskTQTy; 4276 } 4277 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4278 // Build particular struct kmp_task_t for the given task. 4279 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4280 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4281 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4282 QualType KmpTaskTWithPrivatesPtrQTy = 4283 C.getPointerType(KmpTaskTWithPrivatesQTy); 4284 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4285 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4286 KmpTaskTWithPrivatesTy->getPointerTo(); 4287 llvm::Value *KmpTaskTWithPrivatesTySize = 4288 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4289 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4290 4291 // Emit initial values for private copies (if any). 4292 llvm::Value *TaskPrivatesMap = nullptr; 4293 llvm::Type *TaskPrivatesMapTy = 4294 std::next(TaskFunction->arg_begin(), 3)->getType(); 4295 if (!Privates.empty()) { 4296 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4297 TaskPrivatesMap = 4298 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4299 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4300 TaskPrivatesMap, TaskPrivatesMapTy); 4301 } else { 4302 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4303 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4304 } 4305 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4306 // kmp_task_t *tt); 4307 llvm::Function *TaskEntry = emitProxyTaskFunction( 4308 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4309 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4310 TaskPrivatesMap); 4311 4312 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4313 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4314 // kmp_routine_entry_t *task_entry); 4315 // Task flags. Format is taken from 4316 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4317 // description of kmp_tasking_flags struct. 4318 enum { 4319 TiedFlag = 0x1, 4320 FinalFlag = 0x2, 4321 DestructorsFlag = 0x8, 4322 PriorityFlag = 0x20, 4323 DetachableFlag = 0x40, 4324 }; 4325 unsigned Flags = Data.Tied ? TiedFlag : 0; 4326 bool NeedsCleanup = false; 4327 if (!Privates.empty()) { 4328 NeedsCleanup = 4329 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4330 if (NeedsCleanup) 4331 Flags = Flags | DestructorsFlag; 4332 } 4333 if (Data.Priority.getInt()) 4334 Flags = Flags | PriorityFlag; 4335 if (D.hasClausesOfKind<OMPDetachClause>()) 4336 Flags = Flags | DetachableFlag; 4337 llvm::Value *TaskFlags = 4338 Data.Final.getPointer() 4339 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4340 CGF.Builder.getInt32(FinalFlag), 4341 CGF.Builder.getInt32(/*C=*/0)) 4342 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4343 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4344 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4345 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4346 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4347 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4348 TaskEntry, KmpRoutineEntryPtrTy)}; 4349 llvm::Value *NewTask; 4350 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4351 // Check if we have any device clause associated with the directive. 4352 const Expr *Device = nullptr; 4353 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4354 Device = C->getDevice(); 4355 // Emit device ID if any otherwise use default value. 4356 llvm::Value *DeviceID; 4357 if (Device) 4358 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4359 CGF.Int64Ty, /*isSigned=*/true); 4360 else 4361 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4362 AllocArgs.push_back(DeviceID); 4363 NewTask = CGF.EmitRuntimeCall( 4364 OMPBuilder.getOrCreateRuntimeFunction( 4365 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4366 AllocArgs); 4367 } else { 4368 NewTask = 4369 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4370 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4371 AllocArgs); 4372 } 4373 // Emit detach clause initialization. 4374 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4375 // task_descriptor); 4376 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4377 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4378 LValue EvtLVal = CGF.EmitLValue(Evt); 4379 4380 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4381 // int gtid, kmp_task_t *task); 4382 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4383 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4384 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4385 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4386 OMPBuilder.getOrCreateRuntimeFunction( 4387 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4388 {Loc, Tid, NewTask}); 4389 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4390 Evt->getExprLoc()); 4391 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4392 } 4393 // Process affinity clauses. 4394 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4395 // Process list of affinity data. 4396 ASTContext &C = CGM.getContext(); 4397 Address AffinitiesArray = Address::invalid(); 4398 // Calculate number of elements to form the array of affinity data. 4399 llvm::Value *NumOfElements = nullptr; 4400 unsigned NumAffinities = 0; 4401 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4402 if (const Expr *Modifier = C->getModifier()) { 4403 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4404 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4405 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4406 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4407 NumOfElements = 4408 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4409 } 4410 } else { 4411 NumAffinities += C->varlist_size(); 4412 } 4413 } 4414 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4415 // Fields ids in kmp_task_affinity_info record. 4416 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4417 4418 QualType KmpTaskAffinityInfoArrayTy; 4419 if (NumOfElements) { 4420 NumOfElements = CGF.Builder.CreateNUWAdd( 4421 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4422 auto *OVE = new (C) OpaqueValueExpr( 4423 Loc, 4424 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4425 VK_PRValue); 4426 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4427 RValue::get(NumOfElements)); 4428 KmpTaskAffinityInfoArrayTy = 4429 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4430 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4431 // Properly emit variable-sized array. 4432 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4433 ImplicitParamDecl::Other); 4434 CGF.EmitVarDecl(*PD); 4435 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4436 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4437 /*isSigned=*/false); 4438 } else { 4439 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4440 KmpTaskAffinityInfoTy, 4441 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4442 ArrayType::Normal, /*IndexTypeQuals=*/0); 4443 AffinitiesArray = 4444 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4445 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4446 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4447 /*isSigned=*/false); 4448 } 4449 4450 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4451 // Fill array by elements without iterators. 4452 unsigned Pos = 0; 4453 bool HasIterator = false; 4454 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4455 if (C->getModifier()) { 4456 HasIterator = true; 4457 continue; 4458 } 4459 for (const Expr *E : C->varlists()) { 4460 llvm::Value *Addr; 4461 llvm::Value *Size; 4462 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4463 LValue Base = 4464 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4465 KmpTaskAffinityInfoTy); 4466 // affs[i].base_addr = &<Affinities[i].second>; 4467 LValue BaseAddrLVal = CGF.EmitLValueForField( 4468 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4469 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4470 BaseAddrLVal); 4471 // affs[i].len = sizeof(<Affinities[i].second>); 4472 LValue LenLVal = CGF.EmitLValueForField( 4473 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4474 CGF.EmitStoreOfScalar(Size, LenLVal); 4475 ++Pos; 4476 } 4477 } 4478 LValue PosLVal; 4479 if (HasIterator) { 4480 PosLVal = CGF.MakeAddrLValue( 4481 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4482 C.getSizeType()); 4483 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4484 } 4485 // Process elements with iterators. 4486 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4487 const Expr *Modifier = C->getModifier(); 4488 if (!Modifier) 4489 continue; 4490 OMPIteratorGeneratorScope IteratorScope( 4491 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4492 for (const Expr *E : C->varlists()) { 4493 llvm::Value *Addr; 4494 llvm::Value *Size; 4495 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4496 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4497 LValue Base = CGF.MakeAddrLValue( 4498 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4499 // affs[i].base_addr = &<Affinities[i].second>; 4500 LValue BaseAddrLVal = CGF.EmitLValueForField( 4501 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4502 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4503 BaseAddrLVal); 4504 // affs[i].len = sizeof(<Affinities[i].second>); 4505 LValue LenLVal = CGF.EmitLValueForField( 4506 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4507 CGF.EmitStoreOfScalar(Size, LenLVal); 4508 Idx = CGF.Builder.CreateNUWAdd( 4509 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4510 CGF.EmitStoreOfScalar(Idx, PosLVal); 4511 } 4512 } 4513 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4514 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4515 // naffins, kmp_task_affinity_info_t *affin_list); 4516 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4517 llvm::Value *GTid = getThreadID(CGF, Loc); 4518 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4519 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4520 // FIXME: Emit the function and ignore its result for now unless the 4521 // runtime function is properly implemented. 4522 (void)CGF.EmitRuntimeCall( 4523 OMPBuilder.getOrCreateRuntimeFunction( 4524 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4525 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4526 } 4527 llvm::Value *NewTaskNewTaskTTy = 4528 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4529 NewTask, KmpTaskTWithPrivatesPtrTy); 4530 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4531 KmpTaskTWithPrivatesQTy); 4532 LValue TDBase = 4533 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4534 // Fill the data in the resulting kmp_task_t record. 4535 // Copy shareds if there are any. 4536 Address KmpTaskSharedsPtr = Address::invalid(); 4537 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4538 KmpTaskSharedsPtr = 4539 Address(CGF.EmitLoadOfScalar( 4540 CGF.EmitLValueForField( 4541 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4542 KmpTaskTShareds)), 4543 Loc), 4544 CGM.getNaturalTypeAlignment(SharedsTy)); 4545 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4546 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4547 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4548 } 4549 // Emit initial values for private copies (if any). 4550 TaskResultTy Result; 4551 if (!Privates.empty()) { 4552 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4553 SharedsTy, SharedsPtrTy, Data, Privates, 4554 /*ForDup=*/false); 4555 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4556 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4557 Result.TaskDupFn = emitTaskDupFunction( 4558 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4559 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4560 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4561 } 4562 } 4563 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4564 enum { Priority = 0, Destructors = 1 }; 4565 // Provide pointer to function with destructors for privates. 4566 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4567 const RecordDecl *KmpCmplrdataUD = 4568 (*FI)->getType()->getAsUnionType()->getDecl(); 4569 if (NeedsCleanup) { 4570 llvm::Value *DestructorFn = emitDestructorsFunction( 4571 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4572 KmpTaskTWithPrivatesQTy); 4573 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4574 LValue DestructorsLV = CGF.EmitLValueForField( 4575 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4576 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4577 DestructorFn, KmpRoutineEntryPtrTy), 4578 DestructorsLV); 4579 } 4580 // Set priority. 4581 if (Data.Priority.getInt()) { 4582 LValue Data2LV = CGF.EmitLValueForField( 4583 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4584 LValue PriorityLV = CGF.EmitLValueForField( 4585 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4586 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4587 } 4588 Result.NewTask = NewTask; 4589 Result.TaskEntry = TaskEntry; 4590 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4591 Result.TDBase = TDBase; 4592 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4593 return Result; 4594 } 4595 4596 namespace { 4597 /// Dependence kind for RTL. 4598 enum RTLDependenceKindTy { 4599 DepIn = 0x01, 4600 DepInOut = 0x3, 4601 DepMutexInOutSet = 0x4 4602 }; 4603 /// Fields ids in kmp_depend_info record. 4604 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4605 } // namespace 4606 4607 /// Translates internal dependency kind into the runtime kind. 4608 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4609 RTLDependenceKindTy DepKind; 4610 switch (K) { 4611 case OMPC_DEPEND_in: 4612 DepKind = DepIn; 4613 break; 4614 // Out and InOut dependencies must use the same code. 4615 case OMPC_DEPEND_out: 4616 case OMPC_DEPEND_inout: 4617 DepKind = DepInOut; 4618 break; 4619 case OMPC_DEPEND_mutexinoutset: 4620 DepKind = DepMutexInOutSet; 4621 break; 4622 case OMPC_DEPEND_source: 4623 case OMPC_DEPEND_sink: 4624 case OMPC_DEPEND_depobj: 4625 case OMPC_DEPEND_unknown: 4626 llvm_unreachable("Unknown task dependence type"); 4627 } 4628 return DepKind; 4629 } 4630 4631 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4632 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4633 QualType &FlagsTy) { 4634 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4635 if (KmpDependInfoTy.isNull()) { 4636 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4637 KmpDependInfoRD->startDefinition(); 4638 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4639 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4640 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4641 KmpDependInfoRD->completeDefinition(); 4642 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4643 } 4644 } 4645 4646 std::pair<llvm::Value *, LValue> 4647 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4648 SourceLocation Loc) { 4649 ASTContext &C = CGM.getContext(); 4650 QualType FlagsTy; 4651 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4652 RecordDecl *KmpDependInfoRD = 4653 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4654 LValue Base = CGF.EmitLoadOfPointerLValue( 4655 DepobjLVal.getAddress(CGF), 4656 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4657 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4658 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4659 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4660 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4661 Base.getTBAAInfo()); 4662 Address DepObjAddr = CGF.Builder.CreateGEP( 4663 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4664 LValue NumDepsBase = CGF.MakeAddrLValue( 4665 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4666 // NumDeps = deps[i].base_addr; 4667 LValue BaseAddrLVal = CGF.EmitLValueForField( 4668 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4669 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4670 return std::make_pair(NumDeps, Base); 4671 } 4672 4673 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4674 llvm::PointerUnion<unsigned *, LValue *> Pos, 4675 const OMPTaskDataTy::DependData &Data, 4676 Address DependenciesArray) { 4677 CodeGenModule &CGM = CGF.CGM; 4678 ASTContext &C = CGM.getContext(); 4679 QualType FlagsTy; 4680 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4681 RecordDecl *KmpDependInfoRD = 4682 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4683 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4684 4685 OMPIteratorGeneratorScope IteratorScope( 4686 CGF, cast_or_null<OMPIteratorExpr>( 4687 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4688 : nullptr)); 4689 for (const Expr *E : Data.DepExprs) { 4690 llvm::Value *Addr; 4691 llvm::Value *Size; 4692 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4693 LValue Base; 4694 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4695 Base = CGF.MakeAddrLValue( 4696 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4697 } else { 4698 LValue &PosLVal = *Pos.get<LValue *>(); 4699 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4700 Base = CGF.MakeAddrLValue( 4701 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4702 } 4703 // deps[i].base_addr = &<Dependencies[i].second>; 4704 LValue BaseAddrLVal = CGF.EmitLValueForField( 4705 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4706 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4707 BaseAddrLVal); 4708 // deps[i].len = sizeof(<Dependencies[i].second>); 4709 LValue LenLVal = CGF.EmitLValueForField( 4710 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4711 CGF.EmitStoreOfScalar(Size, LenLVal); 4712 // deps[i].flags = <Dependencies[i].first>; 4713 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4714 LValue FlagsLVal = CGF.EmitLValueForField( 4715 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4716 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4717 FlagsLVal); 4718 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4719 ++(*P); 4720 } else { 4721 LValue &PosLVal = *Pos.get<LValue *>(); 4722 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4723 Idx = CGF.Builder.CreateNUWAdd(Idx, 4724 llvm::ConstantInt::get(Idx->getType(), 1)); 4725 CGF.EmitStoreOfScalar(Idx, PosLVal); 4726 } 4727 } 4728 } 4729 4730 static SmallVector<llvm::Value *, 4> 4731 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4732 const OMPTaskDataTy::DependData &Data) { 4733 assert(Data.DepKind == OMPC_DEPEND_depobj && 4734 "Expected depobj dependecy kind."); 4735 SmallVector<llvm::Value *, 4> Sizes; 4736 SmallVector<LValue, 4> SizeLVals; 4737 ASTContext &C = CGF.getContext(); 4738 QualType FlagsTy; 4739 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4740 RecordDecl *KmpDependInfoRD = 4741 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4742 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4743 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4744 { 4745 OMPIteratorGeneratorScope IteratorScope( 4746 CGF, cast_or_null<OMPIteratorExpr>( 4747 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4748 : nullptr)); 4749 for (const Expr *E : Data.DepExprs) { 4750 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4751 LValue Base = CGF.EmitLoadOfPointerLValue( 4752 DepobjLVal.getAddress(CGF), 4753 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4754 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4755 Base.getAddress(CGF), KmpDependInfoPtrT); 4756 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4757 Base.getTBAAInfo()); 4758 Address DepObjAddr = CGF.Builder.CreateGEP( 4759 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4760 LValue NumDepsBase = CGF.MakeAddrLValue( 4761 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4762 // NumDeps = deps[i].base_addr; 4763 LValue BaseAddrLVal = CGF.EmitLValueForField( 4764 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4765 llvm::Value *NumDeps = 4766 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4767 LValue NumLVal = CGF.MakeAddrLValue( 4768 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4769 C.getUIntPtrType()); 4770 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4771 NumLVal.getAddress(CGF)); 4772 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4773 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4774 CGF.EmitStoreOfScalar(Add, NumLVal); 4775 SizeLVals.push_back(NumLVal); 4776 } 4777 } 4778 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4779 llvm::Value *Size = 4780 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4781 Sizes.push_back(Size); 4782 } 4783 return Sizes; 4784 } 4785 4786 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4787 LValue PosLVal, 4788 const OMPTaskDataTy::DependData &Data, 4789 Address DependenciesArray) { 4790 assert(Data.DepKind == OMPC_DEPEND_depobj && 4791 "Expected depobj dependecy kind."); 4792 ASTContext &C = CGF.getContext(); 4793 QualType FlagsTy; 4794 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4795 RecordDecl *KmpDependInfoRD = 4796 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4797 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4798 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4799 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4800 { 4801 OMPIteratorGeneratorScope IteratorScope( 4802 CGF, cast_or_null<OMPIteratorExpr>( 4803 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4804 : nullptr)); 4805 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4806 const Expr *E = Data.DepExprs[I]; 4807 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4808 LValue Base = CGF.EmitLoadOfPointerLValue( 4809 DepobjLVal.getAddress(CGF), 4810 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4811 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4812 Base.getAddress(CGF), KmpDependInfoPtrT); 4813 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4814 Base.getTBAAInfo()); 4815 4816 // Get number of elements in a single depobj. 4817 Address DepObjAddr = CGF.Builder.CreateGEP( 4818 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4819 LValue NumDepsBase = CGF.MakeAddrLValue( 4820 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4821 // NumDeps = deps[i].base_addr; 4822 LValue BaseAddrLVal = CGF.EmitLValueForField( 4823 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4824 llvm::Value *NumDeps = 4825 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4826 4827 // memcopy dependency data. 4828 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4829 ElSize, 4830 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4831 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4832 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4833 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4834 4835 // Increase pos. 4836 // pos += size; 4837 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4838 CGF.EmitStoreOfScalar(Add, PosLVal); 4839 } 4840 } 4841 } 4842 4843 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4844 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4845 SourceLocation Loc) { 4846 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4847 return D.DepExprs.empty(); 4848 })) 4849 return std::make_pair(nullptr, Address::invalid()); 4850 // Process list of dependencies. 4851 ASTContext &C = CGM.getContext(); 4852 Address DependenciesArray = Address::invalid(); 4853 llvm::Value *NumOfElements = nullptr; 4854 unsigned NumDependencies = std::accumulate( 4855 Dependencies.begin(), Dependencies.end(), 0, 4856 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4857 return D.DepKind == OMPC_DEPEND_depobj 4858 ? V 4859 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4860 }); 4861 QualType FlagsTy; 4862 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4863 bool HasDepobjDeps = false; 4864 bool HasRegularWithIterators = false; 4865 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4866 llvm::Value *NumOfRegularWithIterators = 4867 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4868 // Calculate number of depobj dependecies and regular deps with the iterators. 4869 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4870 if (D.DepKind == OMPC_DEPEND_depobj) { 4871 SmallVector<llvm::Value *, 4> Sizes = 4872 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4873 for (llvm::Value *Size : Sizes) { 4874 NumOfDepobjElements = 4875 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4876 } 4877 HasDepobjDeps = true; 4878 continue; 4879 } 4880 // Include number of iterations, if any. 4881 4882 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4883 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4884 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4885 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4886 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4887 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4888 NumOfRegularWithIterators = 4889 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4890 } 4891 HasRegularWithIterators = true; 4892 continue; 4893 } 4894 } 4895 4896 QualType KmpDependInfoArrayTy; 4897 if (HasDepobjDeps || HasRegularWithIterators) { 4898 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4899 /*isSigned=*/false); 4900 if (HasDepobjDeps) { 4901 NumOfElements = 4902 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4903 } 4904 if (HasRegularWithIterators) { 4905 NumOfElements = 4906 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4907 } 4908 auto *OVE = new (C) OpaqueValueExpr( 4909 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4910 VK_PRValue); 4911 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4912 RValue::get(NumOfElements)); 4913 KmpDependInfoArrayTy = 4914 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4915 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4916 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4917 // Properly emit variable-sized array. 4918 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4919 ImplicitParamDecl::Other); 4920 CGF.EmitVarDecl(*PD); 4921 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4922 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4923 /*isSigned=*/false); 4924 } else { 4925 KmpDependInfoArrayTy = C.getConstantArrayType( 4926 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4927 ArrayType::Normal, /*IndexTypeQuals=*/0); 4928 DependenciesArray = 4929 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4930 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4931 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4932 /*isSigned=*/false); 4933 } 4934 unsigned Pos = 0; 4935 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4936 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4937 Dependencies[I].IteratorExpr) 4938 continue; 4939 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4940 DependenciesArray); 4941 } 4942 // Copy regular dependecies with iterators. 4943 LValue PosLVal = CGF.MakeAddrLValue( 4944 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4945 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4946 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4947 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4948 !Dependencies[I].IteratorExpr) 4949 continue; 4950 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4951 DependenciesArray); 4952 } 4953 // Copy final depobj arrays without iterators. 4954 if (HasDepobjDeps) { 4955 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4956 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4957 continue; 4958 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4959 DependenciesArray); 4960 } 4961 } 4962 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4963 DependenciesArray, CGF.VoidPtrTy); 4964 return std::make_pair(NumOfElements, DependenciesArray); 4965 } 4966 4967 Address CGOpenMPRuntime::emitDepobjDependClause( 4968 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4969 SourceLocation Loc) { 4970 if (Dependencies.DepExprs.empty()) 4971 return Address::invalid(); 4972 // Process list of dependencies. 4973 ASTContext &C = CGM.getContext(); 4974 Address DependenciesArray = Address::invalid(); 4975 unsigned NumDependencies = Dependencies.DepExprs.size(); 4976 QualType FlagsTy; 4977 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4978 RecordDecl *KmpDependInfoRD = 4979 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4980 4981 llvm::Value *Size; 4982 // Define type kmp_depend_info[<Dependencies.size()>]; 4983 // For depobj reserve one extra element to store the number of elements. 4984 // It is required to handle depobj(x) update(in) construct. 4985 // kmp_depend_info[<Dependencies.size()>] deps; 4986 llvm::Value *NumDepsVal; 4987 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4988 if (const auto *IE = 4989 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4990 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4991 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4992 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4993 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4994 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4995 } 4996 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4997 NumDepsVal); 4998 CharUnits SizeInBytes = 4999 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5000 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5001 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5002 NumDepsVal = 5003 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5004 } else { 5005 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5006 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5007 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5008 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5009 Size = CGM.getSize(Sz.alignTo(Align)); 5010 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5011 } 5012 // Need to allocate on the dynamic memory. 5013 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5014 // Use default allocator. 5015 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5016 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5017 5018 llvm::Value *Addr = 5019 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5020 CGM.getModule(), OMPRTL___kmpc_alloc), 5021 Args, ".dep.arr.addr"); 5022 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5023 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5024 DependenciesArray = Address(Addr, Align); 5025 // Write number of elements in the first element of array for depobj. 5026 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5027 // deps[i].base_addr = NumDependencies; 5028 LValue BaseAddrLVal = CGF.EmitLValueForField( 5029 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5030 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5031 llvm::PointerUnion<unsigned *, LValue *> Pos; 5032 unsigned Idx = 1; 5033 LValue PosLVal; 5034 if (Dependencies.IteratorExpr) { 5035 PosLVal = CGF.MakeAddrLValue( 5036 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5037 C.getSizeType()); 5038 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5039 /*IsInit=*/true); 5040 Pos = &PosLVal; 5041 } else { 5042 Pos = &Idx; 5043 } 5044 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5045 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5046 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5047 return DependenciesArray; 5048 } 5049 5050 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5051 SourceLocation Loc) { 5052 ASTContext &C = CGM.getContext(); 5053 QualType FlagsTy; 5054 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5055 LValue Base = CGF.EmitLoadOfPointerLValue( 5056 DepobjLVal.getAddress(CGF), 5057 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5058 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5059 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5060 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5061 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5062 Addr.getElementType(), Addr.getPointer(), 5063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5064 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5065 CGF.VoidPtrTy); 5066 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5067 // Use default allocator. 5068 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5069 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5070 5071 // _kmpc_free(gtid, addr, nullptr); 5072 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5073 CGM.getModule(), OMPRTL___kmpc_free), 5074 Args); 5075 } 5076 5077 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5078 OpenMPDependClauseKind NewDepKind, 5079 SourceLocation Loc) { 5080 ASTContext &C = CGM.getContext(); 5081 QualType FlagsTy; 5082 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5083 RecordDecl *KmpDependInfoRD = 5084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5086 llvm::Value *NumDeps; 5087 LValue Base; 5088 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5089 5090 Address Begin = Base.getAddress(CGF); 5091 // Cast from pointer to array type to pointer to single element. 5092 llvm::Value *End = CGF.Builder.CreateGEP( 5093 Begin.getElementType(), Begin.getPointer(), NumDeps); 5094 // The basic structure here is a while-do loop. 5095 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5096 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5097 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5098 CGF.EmitBlock(BodyBB); 5099 llvm::PHINode *ElementPHI = 5100 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5101 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5102 Begin = Address(ElementPHI, Begin.getAlignment()); 5103 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5104 Base.getTBAAInfo()); 5105 // deps[i].flags = NewDepKind; 5106 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5107 LValue FlagsLVal = CGF.EmitLValueForField( 5108 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5109 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5110 FlagsLVal); 5111 5112 // Shift the address forward by one element. 5113 Address ElementNext = 5114 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5115 ElementPHI->addIncoming(ElementNext.getPointer(), 5116 CGF.Builder.GetInsertBlock()); 5117 llvm::Value *IsEmpty = 5118 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5119 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5120 // Done. 5121 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5122 } 5123 5124 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5125 const OMPExecutableDirective &D, 5126 llvm::Function *TaskFunction, 5127 QualType SharedsTy, Address Shareds, 5128 const Expr *IfCond, 5129 const OMPTaskDataTy &Data) { 5130 if (!CGF.HaveInsertPoint()) 5131 return; 5132 5133 TaskResultTy Result = 5134 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5135 llvm::Value *NewTask = Result.NewTask; 5136 llvm::Function *TaskEntry = Result.TaskEntry; 5137 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5138 LValue TDBase = Result.TDBase; 5139 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5140 // Process list of dependences. 5141 Address DependenciesArray = Address::invalid(); 5142 llvm::Value *NumOfElements; 5143 std::tie(NumOfElements, DependenciesArray) = 5144 emitDependClause(CGF, Data.Dependences, Loc); 5145 5146 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5147 // libcall. 5148 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5149 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5150 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5151 // list is not empty 5152 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5153 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5154 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5155 llvm::Value *DepTaskArgs[7]; 5156 if (!Data.Dependences.empty()) { 5157 DepTaskArgs[0] = UpLoc; 5158 DepTaskArgs[1] = ThreadID; 5159 DepTaskArgs[2] = NewTask; 5160 DepTaskArgs[3] = NumOfElements; 5161 DepTaskArgs[4] = DependenciesArray.getPointer(); 5162 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5163 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5164 } 5165 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5166 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5167 if (!Data.Tied) { 5168 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5169 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5170 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5171 } 5172 if (!Data.Dependences.empty()) { 5173 CGF.EmitRuntimeCall( 5174 OMPBuilder.getOrCreateRuntimeFunction( 5175 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5176 DepTaskArgs); 5177 } else { 5178 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5179 CGM.getModule(), OMPRTL___kmpc_omp_task), 5180 TaskArgs); 5181 } 5182 // Check if parent region is untied and build return for untied task; 5183 if (auto *Region = 5184 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5185 Region->emitUntiedSwitch(CGF); 5186 }; 5187 5188 llvm::Value *DepWaitTaskArgs[6]; 5189 if (!Data.Dependences.empty()) { 5190 DepWaitTaskArgs[0] = UpLoc; 5191 DepWaitTaskArgs[1] = ThreadID; 5192 DepWaitTaskArgs[2] = NumOfElements; 5193 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5194 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5195 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5196 } 5197 auto &M = CGM.getModule(); 5198 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5199 TaskEntry, &Data, &DepWaitTaskArgs, 5200 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5201 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5202 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5203 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5204 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5205 // is specified. 5206 if (!Data.Dependences.empty()) 5207 CGF.EmitRuntimeCall( 5208 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5209 DepWaitTaskArgs); 5210 // Call proxy_task_entry(gtid, new_task); 5211 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5212 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5213 Action.Enter(CGF); 5214 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5215 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5216 OutlinedFnArgs); 5217 }; 5218 5219 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5220 // kmp_task_t *new_task); 5221 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5222 // kmp_task_t *new_task); 5223 RegionCodeGenTy RCG(CodeGen); 5224 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5225 M, OMPRTL___kmpc_omp_task_begin_if0), 5226 TaskArgs, 5227 OMPBuilder.getOrCreateRuntimeFunction( 5228 M, OMPRTL___kmpc_omp_task_complete_if0), 5229 TaskArgs); 5230 RCG.setAction(Action); 5231 RCG(CGF); 5232 }; 5233 5234 if (IfCond) { 5235 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5236 } else { 5237 RegionCodeGenTy ThenRCG(ThenCodeGen); 5238 ThenRCG(CGF); 5239 } 5240 } 5241 5242 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5243 const OMPLoopDirective &D, 5244 llvm::Function *TaskFunction, 5245 QualType SharedsTy, Address Shareds, 5246 const Expr *IfCond, 5247 const OMPTaskDataTy &Data) { 5248 if (!CGF.HaveInsertPoint()) 5249 return; 5250 TaskResultTy Result = 5251 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5252 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5253 // libcall. 5254 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5255 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5256 // sched, kmp_uint64 grainsize, void *task_dup); 5257 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5258 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5259 llvm::Value *IfVal; 5260 if (IfCond) { 5261 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5262 /*isSigned=*/true); 5263 } else { 5264 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5265 } 5266 5267 LValue LBLVal = CGF.EmitLValueForField( 5268 Result.TDBase, 5269 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5270 const auto *LBVar = 5271 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5272 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5273 LBLVal.getQuals(), 5274 /*IsInitializer=*/true); 5275 LValue UBLVal = CGF.EmitLValueForField( 5276 Result.TDBase, 5277 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5278 const auto *UBVar = 5279 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5280 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5281 UBLVal.getQuals(), 5282 /*IsInitializer=*/true); 5283 LValue StLVal = CGF.EmitLValueForField( 5284 Result.TDBase, 5285 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5286 const auto *StVar = 5287 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5288 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5289 StLVal.getQuals(), 5290 /*IsInitializer=*/true); 5291 // Store reductions address. 5292 LValue RedLVal = CGF.EmitLValueForField( 5293 Result.TDBase, 5294 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5295 if (Data.Reductions) { 5296 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5297 } else { 5298 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5299 CGF.getContext().VoidPtrTy); 5300 } 5301 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5302 llvm::Value *TaskArgs[] = { 5303 UpLoc, 5304 ThreadID, 5305 Result.NewTask, 5306 IfVal, 5307 LBLVal.getPointer(CGF), 5308 UBLVal.getPointer(CGF), 5309 CGF.EmitLoadOfScalar(StLVal, Loc), 5310 llvm::ConstantInt::getSigned( 5311 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5312 llvm::ConstantInt::getSigned( 5313 CGF.IntTy, Data.Schedule.getPointer() 5314 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5315 : NoSchedule), 5316 Data.Schedule.getPointer() 5317 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5318 /*isSigned=*/false) 5319 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5320 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5321 Result.TaskDupFn, CGF.VoidPtrTy) 5322 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5323 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5324 CGM.getModule(), OMPRTL___kmpc_taskloop), 5325 TaskArgs); 5326 } 5327 5328 /// Emit reduction operation for each element of array (required for 5329 /// array sections) LHS op = RHS. 5330 /// \param Type Type of array. 5331 /// \param LHSVar Variable on the left side of the reduction operation 5332 /// (references element of array in original variable). 5333 /// \param RHSVar Variable on the right side of the reduction operation 5334 /// (references element of array in original variable). 5335 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5336 /// RHSVar. 5337 static void EmitOMPAggregateReduction( 5338 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5339 const VarDecl *RHSVar, 5340 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5341 const Expr *, const Expr *)> &RedOpGen, 5342 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5343 const Expr *UpExpr = nullptr) { 5344 // Perform element-by-element initialization. 5345 QualType ElementTy; 5346 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5347 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5348 5349 // Drill down to the base element type on both arrays. 5350 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5351 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5352 5353 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5354 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5355 // Cast from pointer to array type to pointer to single element. 5356 llvm::Value *LHSEnd = 5357 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5358 // The basic structure here is a while-do loop. 5359 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5360 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5361 llvm::Value *IsEmpty = 5362 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5363 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5364 5365 // Enter the loop body, making that address the current address. 5366 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5367 CGF.EmitBlock(BodyBB); 5368 5369 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5370 5371 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5372 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5373 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5374 Address RHSElementCurrent = 5375 Address(RHSElementPHI, 5376 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5377 5378 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5379 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5380 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5381 Address LHSElementCurrent = 5382 Address(LHSElementPHI, 5383 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5384 5385 // Emit copy. 5386 CodeGenFunction::OMPPrivateScope Scope(CGF); 5387 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5388 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5389 Scope.Privatize(); 5390 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5391 Scope.ForceCleanup(); 5392 5393 // Shift the address forward by one element. 5394 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5395 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5396 "omp.arraycpy.dest.element"); 5397 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5398 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5399 "omp.arraycpy.src.element"); 5400 // Check whether we've reached the end. 5401 llvm::Value *Done = 5402 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5403 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5404 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5405 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5406 5407 // Done. 5408 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5409 } 5410 5411 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5412 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5413 /// UDR combiner function. 5414 static void emitReductionCombiner(CodeGenFunction &CGF, 5415 const Expr *ReductionOp) { 5416 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5417 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5418 if (const auto *DRE = 5419 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5420 if (const auto *DRD = 5421 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5422 std::pair<llvm::Function *, llvm::Function *> Reduction = 5423 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5424 RValue Func = RValue::get(Reduction.first); 5425 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5426 CGF.EmitIgnoredExpr(ReductionOp); 5427 return; 5428 } 5429 CGF.EmitIgnoredExpr(ReductionOp); 5430 } 5431 5432 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5433 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5434 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5435 ArrayRef<const Expr *> ReductionOps) { 5436 ASTContext &C = CGM.getContext(); 5437 5438 // void reduction_func(void *LHSArg, void *RHSArg); 5439 FunctionArgList Args; 5440 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5441 ImplicitParamDecl::Other); 5442 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5443 ImplicitParamDecl::Other); 5444 Args.push_back(&LHSArg); 5445 Args.push_back(&RHSArg); 5446 const auto &CGFI = 5447 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5448 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5449 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5450 llvm::GlobalValue::InternalLinkage, Name, 5451 &CGM.getModule()); 5452 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5453 Fn->setDoesNotRecurse(); 5454 CodeGenFunction CGF(CGM); 5455 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5456 5457 // Dst = (void*[n])(LHSArg); 5458 // Src = (void*[n])(RHSArg); 5459 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5460 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5461 ArgsType), CGF.getPointerAlign()); 5462 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5463 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5464 ArgsType), CGF.getPointerAlign()); 5465 5466 // ... 5467 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5468 // ... 5469 CodeGenFunction::OMPPrivateScope Scope(CGF); 5470 auto IPriv = Privates.begin(); 5471 unsigned Idx = 0; 5472 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5473 const auto *RHSVar = 5474 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5475 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5476 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5477 }); 5478 const auto *LHSVar = 5479 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5480 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5481 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5482 }); 5483 QualType PrivTy = (*IPriv)->getType(); 5484 if (PrivTy->isVariablyModifiedType()) { 5485 // Get array size and emit VLA type. 5486 ++Idx; 5487 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5488 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5489 const VariableArrayType *VLA = 5490 CGF.getContext().getAsVariableArrayType(PrivTy); 5491 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5492 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5493 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5494 CGF.EmitVariablyModifiedType(PrivTy); 5495 } 5496 } 5497 Scope.Privatize(); 5498 IPriv = Privates.begin(); 5499 auto ILHS = LHSExprs.begin(); 5500 auto IRHS = RHSExprs.begin(); 5501 for (const Expr *E : ReductionOps) { 5502 if ((*IPriv)->getType()->isArrayType()) { 5503 // Emit reduction for array section. 5504 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5505 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5506 EmitOMPAggregateReduction( 5507 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5508 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5509 emitReductionCombiner(CGF, E); 5510 }); 5511 } else { 5512 // Emit reduction for array subscript or single variable. 5513 emitReductionCombiner(CGF, E); 5514 } 5515 ++IPriv; 5516 ++ILHS; 5517 ++IRHS; 5518 } 5519 Scope.ForceCleanup(); 5520 CGF.FinishFunction(); 5521 return Fn; 5522 } 5523 5524 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5525 const Expr *ReductionOp, 5526 const Expr *PrivateRef, 5527 const DeclRefExpr *LHS, 5528 const DeclRefExpr *RHS) { 5529 if (PrivateRef->getType()->isArrayType()) { 5530 // Emit reduction for array section. 5531 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5532 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5533 EmitOMPAggregateReduction( 5534 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5535 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5536 emitReductionCombiner(CGF, ReductionOp); 5537 }); 5538 } else { 5539 // Emit reduction for array subscript or single variable. 5540 emitReductionCombiner(CGF, ReductionOp); 5541 } 5542 } 5543 5544 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5545 ArrayRef<const Expr *> Privates, 5546 ArrayRef<const Expr *> LHSExprs, 5547 ArrayRef<const Expr *> RHSExprs, 5548 ArrayRef<const Expr *> ReductionOps, 5549 ReductionOptionsTy Options) { 5550 if (!CGF.HaveInsertPoint()) 5551 return; 5552 5553 bool WithNowait = Options.WithNowait; 5554 bool SimpleReduction = Options.SimpleReduction; 5555 5556 // Next code should be emitted for reduction: 5557 // 5558 // static kmp_critical_name lock = { 0 }; 5559 // 5560 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5561 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5562 // ... 5563 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5564 // *(Type<n>-1*)rhs[<n>-1]); 5565 // } 5566 // 5567 // ... 5568 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5569 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5570 // RedList, reduce_func, &<lock>)) { 5571 // case 1: 5572 // ... 5573 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5574 // ... 5575 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5576 // break; 5577 // case 2: 5578 // ... 5579 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5580 // ... 5581 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5582 // break; 5583 // default:; 5584 // } 5585 // 5586 // if SimpleReduction is true, only the next code is generated: 5587 // ... 5588 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5589 // ... 5590 5591 ASTContext &C = CGM.getContext(); 5592 5593 if (SimpleReduction) { 5594 CodeGenFunction::RunCleanupsScope Scope(CGF); 5595 auto IPriv = Privates.begin(); 5596 auto ILHS = LHSExprs.begin(); 5597 auto IRHS = RHSExprs.begin(); 5598 for (const Expr *E : ReductionOps) { 5599 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5600 cast<DeclRefExpr>(*IRHS)); 5601 ++IPriv; 5602 ++ILHS; 5603 ++IRHS; 5604 } 5605 return; 5606 } 5607 5608 // 1. Build a list of reduction variables. 5609 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5610 auto Size = RHSExprs.size(); 5611 for (const Expr *E : Privates) { 5612 if (E->getType()->isVariablyModifiedType()) 5613 // Reserve place for array size. 5614 ++Size; 5615 } 5616 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5617 QualType ReductionArrayTy = 5618 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5619 /*IndexTypeQuals=*/0); 5620 Address ReductionList = 5621 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5622 auto IPriv = Privates.begin(); 5623 unsigned Idx = 0; 5624 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5625 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5626 CGF.Builder.CreateStore( 5627 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5628 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5629 Elem); 5630 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5631 // Store array size. 5632 ++Idx; 5633 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5634 llvm::Value *Size = CGF.Builder.CreateIntCast( 5635 CGF.getVLASize( 5636 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5637 .NumElts, 5638 CGF.SizeTy, /*isSigned=*/false); 5639 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5640 Elem); 5641 } 5642 } 5643 5644 // 2. Emit reduce_func(). 5645 llvm::Function *ReductionFn = emitReductionFunction( 5646 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5647 LHSExprs, RHSExprs, ReductionOps); 5648 5649 // 3. Create static kmp_critical_name lock = { 0 }; 5650 std::string Name = getName({"reduction"}); 5651 llvm::Value *Lock = getCriticalRegionLock(Name); 5652 5653 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5654 // RedList, reduce_func, &<lock>); 5655 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5656 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5657 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5658 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5659 ReductionList.getPointer(), CGF.VoidPtrTy); 5660 llvm::Value *Args[] = { 5661 IdentTLoc, // ident_t *<loc> 5662 ThreadId, // i32 <gtid> 5663 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5664 ReductionArrayTySize, // size_type sizeof(RedList) 5665 RL, // void *RedList 5666 ReductionFn, // void (*) (void *, void *) <reduce_func> 5667 Lock // kmp_critical_name *&<lock> 5668 }; 5669 llvm::Value *Res = CGF.EmitRuntimeCall( 5670 OMPBuilder.getOrCreateRuntimeFunction( 5671 CGM.getModule(), 5672 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5673 Args); 5674 5675 // 5. Build switch(res) 5676 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5677 llvm::SwitchInst *SwInst = 5678 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5679 5680 // 6. Build case 1: 5681 // ... 5682 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5683 // ... 5684 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5685 // break; 5686 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5687 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5688 CGF.EmitBlock(Case1BB); 5689 5690 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5691 llvm::Value *EndArgs[] = { 5692 IdentTLoc, // ident_t *<loc> 5693 ThreadId, // i32 <gtid> 5694 Lock // kmp_critical_name *&<lock> 5695 }; 5696 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5697 CodeGenFunction &CGF, PrePostActionTy &Action) { 5698 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5699 auto IPriv = Privates.begin(); 5700 auto ILHS = LHSExprs.begin(); 5701 auto IRHS = RHSExprs.begin(); 5702 for (const Expr *E : ReductionOps) { 5703 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5704 cast<DeclRefExpr>(*IRHS)); 5705 ++IPriv; 5706 ++ILHS; 5707 ++IRHS; 5708 } 5709 }; 5710 RegionCodeGenTy RCG(CodeGen); 5711 CommonActionTy Action( 5712 nullptr, llvm::None, 5713 OMPBuilder.getOrCreateRuntimeFunction( 5714 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5715 : OMPRTL___kmpc_end_reduce), 5716 EndArgs); 5717 RCG.setAction(Action); 5718 RCG(CGF); 5719 5720 CGF.EmitBranch(DefaultBB); 5721 5722 // 7. Build case 2: 5723 // ... 5724 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5725 // ... 5726 // break; 5727 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5728 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5729 CGF.EmitBlock(Case2BB); 5730 5731 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5732 CodeGenFunction &CGF, PrePostActionTy &Action) { 5733 auto ILHS = LHSExprs.begin(); 5734 auto IRHS = RHSExprs.begin(); 5735 auto IPriv = Privates.begin(); 5736 for (const Expr *E : ReductionOps) { 5737 const Expr *XExpr = nullptr; 5738 const Expr *EExpr = nullptr; 5739 const Expr *UpExpr = nullptr; 5740 BinaryOperatorKind BO = BO_Comma; 5741 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5742 if (BO->getOpcode() == BO_Assign) { 5743 XExpr = BO->getLHS(); 5744 UpExpr = BO->getRHS(); 5745 } 5746 } 5747 // Try to emit update expression as a simple atomic. 5748 const Expr *RHSExpr = UpExpr; 5749 if (RHSExpr) { 5750 // Analyze RHS part of the whole expression. 5751 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5752 RHSExpr->IgnoreParenImpCasts())) { 5753 // If this is a conditional operator, analyze its condition for 5754 // min/max reduction operator. 5755 RHSExpr = ACO->getCond(); 5756 } 5757 if (const auto *BORHS = 5758 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5759 EExpr = BORHS->getRHS(); 5760 BO = BORHS->getOpcode(); 5761 } 5762 } 5763 if (XExpr) { 5764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5765 auto &&AtomicRedGen = [BO, VD, 5766 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5767 const Expr *EExpr, const Expr *UpExpr) { 5768 LValue X = CGF.EmitLValue(XExpr); 5769 RValue E; 5770 if (EExpr) 5771 E = CGF.EmitAnyExpr(EExpr); 5772 CGF.EmitOMPAtomicSimpleUpdateExpr( 5773 X, E, BO, /*IsXLHSInRHSPart=*/true, 5774 llvm::AtomicOrdering::Monotonic, Loc, 5775 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5776 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5777 PrivateScope.addPrivate( 5778 VD, [&CGF, VD, XRValue, Loc]() { 5779 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5780 CGF.emitOMPSimpleStore( 5781 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5782 VD->getType().getNonReferenceType(), Loc); 5783 return LHSTemp; 5784 }); 5785 (void)PrivateScope.Privatize(); 5786 return CGF.EmitAnyExpr(UpExpr); 5787 }); 5788 }; 5789 if ((*IPriv)->getType()->isArrayType()) { 5790 // Emit atomic reduction for array section. 5791 const auto *RHSVar = 5792 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5793 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5794 AtomicRedGen, XExpr, EExpr, UpExpr); 5795 } else { 5796 // Emit atomic reduction for array subscript or single variable. 5797 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5798 } 5799 } else { 5800 // Emit as a critical region. 5801 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5802 const Expr *, const Expr *) { 5803 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5804 std::string Name = RT.getName({"atomic_reduction"}); 5805 RT.emitCriticalRegion( 5806 CGF, Name, 5807 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5808 Action.Enter(CGF); 5809 emitReductionCombiner(CGF, E); 5810 }, 5811 Loc); 5812 }; 5813 if ((*IPriv)->getType()->isArrayType()) { 5814 const auto *LHSVar = 5815 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5816 const auto *RHSVar = 5817 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5818 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5819 CritRedGen); 5820 } else { 5821 CritRedGen(CGF, nullptr, nullptr, nullptr); 5822 } 5823 } 5824 ++ILHS; 5825 ++IRHS; 5826 ++IPriv; 5827 } 5828 }; 5829 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5830 if (!WithNowait) { 5831 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5832 llvm::Value *EndArgs[] = { 5833 IdentTLoc, // ident_t *<loc> 5834 ThreadId, // i32 <gtid> 5835 Lock // kmp_critical_name *&<lock> 5836 }; 5837 CommonActionTy Action(nullptr, llvm::None, 5838 OMPBuilder.getOrCreateRuntimeFunction( 5839 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5840 EndArgs); 5841 AtomicRCG.setAction(Action); 5842 AtomicRCG(CGF); 5843 } else { 5844 AtomicRCG(CGF); 5845 } 5846 5847 CGF.EmitBranch(DefaultBB); 5848 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5849 } 5850 5851 /// Generates unique name for artificial threadprivate variables. 5852 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5853 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5854 const Expr *Ref) { 5855 SmallString<256> Buffer; 5856 llvm::raw_svector_ostream Out(Buffer); 5857 const clang::DeclRefExpr *DE; 5858 const VarDecl *D = ::getBaseDecl(Ref, DE); 5859 if (!D) 5860 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5861 D = D->getCanonicalDecl(); 5862 std::string Name = CGM.getOpenMPRuntime().getName( 5863 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5864 Out << Prefix << Name << "_" 5865 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5866 return std::string(Out.str()); 5867 } 5868 5869 /// Emits reduction initializer function: 5870 /// \code 5871 /// void @.red_init(void* %arg, void* %orig) { 5872 /// %0 = bitcast void* %arg to <type>* 5873 /// store <type> <init>, <type>* %0 5874 /// ret void 5875 /// } 5876 /// \endcode 5877 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5878 SourceLocation Loc, 5879 ReductionCodeGen &RCG, unsigned N) { 5880 ASTContext &C = CGM.getContext(); 5881 QualType VoidPtrTy = C.VoidPtrTy; 5882 VoidPtrTy.addRestrict(); 5883 FunctionArgList Args; 5884 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5885 ImplicitParamDecl::Other); 5886 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5887 ImplicitParamDecl::Other); 5888 Args.emplace_back(&Param); 5889 Args.emplace_back(&ParamOrig); 5890 const auto &FnInfo = 5891 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5892 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5893 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5894 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5895 Name, &CGM.getModule()); 5896 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5897 Fn->setDoesNotRecurse(); 5898 CodeGenFunction CGF(CGM); 5899 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5900 Address PrivateAddr = CGF.EmitLoadOfPointer( 5901 CGF.GetAddrOfLocalVar(&Param), 5902 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5903 llvm::Value *Size = nullptr; 5904 // If the size of the reduction item is non-constant, load it from global 5905 // threadprivate variable. 5906 if (RCG.getSizes(N).second) { 5907 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5908 CGF, CGM.getContext().getSizeType(), 5909 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5910 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5911 CGM.getContext().getSizeType(), Loc); 5912 } 5913 RCG.emitAggregateType(CGF, N, Size); 5914 Address OrigAddr = Address::invalid(); 5915 // If initializer uses initializer from declare reduction construct, emit a 5916 // pointer to the address of the original reduction item (reuired by reduction 5917 // initializer) 5918 if (RCG.usesReductionInitializer(N)) { 5919 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5920 OrigAddr = CGF.EmitLoadOfPointer( 5921 SharedAddr, 5922 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5923 } 5924 // Emit the initializer: 5925 // %0 = bitcast void* %arg to <type>* 5926 // store <type> <init>, <type>* %0 5927 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5928 [](CodeGenFunction &) { return false; }); 5929 CGF.FinishFunction(); 5930 return Fn; 5931 } 5932 5933 /// Emits reduction combiner function: 5934 /// \code 5935 /// void @.red_comb(void* %arg0, void* %arg1) { 5936 /// %lhs = bitcast void* %arg0 to <type>* 5937 /// %rhs = bitcast void* %arg1 to <type>* 5938 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5939 /// store <type> %2, <type>* %lhs 5940 /// ret void 5941 /// } 5942 /// \endcode 5943 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5944 SourceLocation Loc, 5945 ReductionCodeGen &RCG, unsigned N, 5946 const Expr *ReductionOp, 5947 const Expr *LHS, const Expr *RHS, 5948 const Expr *PrivateRef) { 5949 ASTContext &C = CGM.getContext(); 5950 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5951 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5952 FunctionArgList Args; 5953 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5954 C.VoidPtrTy, ImplicitParamDecl::Other); 5955 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5956 ImplicitParamDecl::Other); 5957 Args.emplace_back(&ParamInOut); 5958 Args.emplace_back(&ParamIn); 5959 const auto &FnInfo = 5960 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5961 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5962 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5963 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5964 Name, &CGM.getModule()); 5965 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5966 Fn->setDoesNotRecurse(); 5967 CodeGenFunction CGF(CGM); 5968 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5969 llvm::Value *Size = nullptr; 5970 // If the size of the reduction item is non-constant, load it from global 5971 // threadprivate variable. 5972 if (RCG.getSizes(N).second) { 5973 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5974 CGF, CGM.getContext().getSizeType(), 5975 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5976 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5977 CGM.getContext().getSizeType(), Loc); 5978 } 5979 RCG.emitAggregateType(CGF, N, Size); 5980 // Remap lhs and rhs variables to the addresses of the function arguments. 5981 // %lhs = bitcast void* %arg0 to <type>* 5982 // %rhs = bitcast void* %arg1 to <type>* 5983 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5984 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5985 // Pull out the pointer to the variable. 5986 Address PtrAddr = CGF.EmitLoadOfPointer( 5987 CGF.GetAddrOfLocalVar(&ParamInOut), 5988 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5989 return CGF.Builder.CreateElementBitCast( 5990 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5991 }); 5992 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5993 // Pull out the pointer to the variable. 5994 Address PtrAddr = CGF.EmitLoadOfPointer( 5995 CGF.GetAddrOfLocalVar(&ParamIn), 5996 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5997 return CGF.Builder.CreateElementBitCast( 5998 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5999 }); 6000 PrivateScope.Privatize(); 6001 // Emit the combiner body: 6002 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6003 // store <type> %2, <type>* %lhs 6004 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6005 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6006 cast<DeclRefExpr>(RHS)); 6007 CGF.FinishFunction(); 6008 return Fn; 6009 } 6010 6011 /// Emits reduction finalizer function: 6012 /// \code 6013 /// void @.red_fini(void* %arg) { 6014 /// %0 = bitcast void* %arg to <type>* 6015 /// <destroy>(<type>* %0) 6016 /// ret void 6017 /// } 6018 /// \endcode 6019 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6020 SourceLocation Loc, 6021 ReductionCodeGen &RCG, unsigned N) { 6022 if (!RCG.needCleanups(N)) 6023 return nullptr; 6024 ASTContext &C = CGM.getContext(); 6025 FunctionArgList Args; 6026 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6027 ImplicitParamDecl::Other); 6028 Args.emplace_back(&Param); 6029 const auto &FnInfo = 6030 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6031 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6032 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6033 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6034 Name, &CGM.getModule()); 6035 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6036 Fn->setDoesNotRecurse(); 6037 CodeGenFunction CGF(CGM); 6038 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6039 Address PrivateAddr = CGF.EmitLoadOfPointer( 6040 CGF.GetAddrOfLocalVar(&Param), 6041 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6042 llvm::Value *Size = nullptr; 6043 // If the size of the reduction item is non-constant, load it from global 6044 // threadprivate variable. 6045 if (RCG.getSizes(N).second) { 6046 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6047 CGF, CGM.getContext().getSizeType(), 6048 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6049 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6050 CGM.getContext().getSizeType(), Loc); 6051 } 6052 RCG.emitAggregateType(CGF, N, Size); 6053 // Emit the finalizer body: 6054 // <destroy>(<type>* %0) 6055 RCG.emitCleanups(CGF, N, PrivateAddr); 6056 CGF.FinishFunction(Loc); 6057 return Fn; 6058 } 6059 6060 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6061 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6062 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6063 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6064 return nullptr; 6065 6066 // Build typedef struct: 6067 // kmp_taskred_input { 6068 // void *reduce_shar; // shared reduction item 6069 // void *reduce_orig; // original reduction item used for initialization 6070 // size_t reduce_size; // size of data item 6071 // void *reduce_init; // data initialization routine 6072 // void *reduce_fini; // data finalization routine 6073 // void *reduce_comb; // data combiner routine 6074 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6075 // } kmp_taskred_input_t; 6076 ASTContext &C = CGM.getContext(); 6077 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6078 RD->startDefinition(); 6079 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6080 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6081 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6082 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6083 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6084 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6085 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6086 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6087 RD->completeDefinition(); 6088 QualType RDType = C.getRecordType(RD); 6089 unsigned Size = Data.ReductionVars.size(); 6090 llvm::APInt ArraySize(/*numBits=*/64, Size); 6091 QualType ArrayRDType = C.getConstantArrayType( 6092 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6093 // kmp_task_red_input_t .rd_input.[Size]; 6094 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6095 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6096 Data.ReductionCopies, Data.ReductionOps); 6097 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6098 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6099 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6100 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6101 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6102 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6103 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6104 ".rd_input.gep."); 6105 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6106 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6107 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6108 RCG.emitSharedOrigLValue(CGF, Cnt); 6109 llvm::Value *CastedShared = 6110 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6111 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6112 // ElemLVal.reduce_orig = &Origs[Cnt]; 6113 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6114 llvm::Value *CastedOrig = 6115 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6116 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6117 RCG.emitAggregateType(CGF, Cnt); 6118 llvm::Value *SizeValInChars; 6119 llvm::Value *SizeVal; 6120 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6121 // We use delayed creation/initialization for VLAs and array sections. It is 6122 // required because runtime does not provide the way to pass the sizes of 6123 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6124 // threadprivate global variables are used to store these values and use 6125 // them in the functions. 6126 bool DelayedCreation = !!SizeVal; 6127 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6128 /*isSigned=*/false); 6129 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6130 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6131 // ElemLVal.reduce_init = init; 6132 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6133 llvm::Value *InitAddr = 6134 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6135 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6136 // ElemLVal.reduce_fini = fini; 6137 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6138 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6139 llvm::Value *FiniAddr = Fini 6140 ? CGF.EmitCastToVoidPtr(Fini) 6141 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6142 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6143 // ElemLVal.reduce_comb = comb; 6144 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6145 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6146 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6147 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6148 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6149 // ElemLVal.flags = 0; 6150 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6151 if (DelayedCreation) { 6152 CGF.EmitStoreOfScalar( 6153 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6154 FlagsLVal); 6155 } else 6156 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6157 FlagsLVal.getType()); 6158 } 6159 if (Data.IsReductionWithTaskMod) { 6160 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6161 // is_ws, int num, void *data); 6162 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6163 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6164 CGM.IntTy, /*isSigned=*/true); 6165 llvm::Value *Args[] = { 6166 IdentTLoc, GTid, 6167 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6168 /*isSigned=*/true), 6169 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6170 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6171 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6172 return CGF.EmitRuntimeCall( 6173 OMPBuilder.getOrCreateRuntimeFunction( 6174 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6175 Args); 6176 } 6177 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6178 llvm::Value *Args[] = { 6179 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6180 /*isSigned=*/true), 6181 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6182 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6183 CGM.VoidPtrTy)}; 6184 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6185 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6186 Args); 6187 } 6188 6189 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6190 SourceLocation Loc, 6191 bool IsWorksharingReduction) { 6192 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6193 // is_ws, int num, void *data); 6194 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6195 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6196 CGM.IntTy, /*isSigned=*/true); 6197 llvm::Value *Args[] = {IdentTLoc, GTid, 6198 llvm::ConstantInt::get(CGM.IntTy, 6199 IsWorksharingReduction ? 1 : 0, 6200 /*isSigned=*/true)}; 6201 (void)CGF.EmitRuntimeCall( 6202 OMPBuilder.getOrCreateRuntimeFunction( 6203 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6204 Args); 6205 } 6206 6207 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6208 SourceLocation Loc, 6209 ReductionCodeGen &RCG, 6210 unsigned N) { 6211 auto Sizes = RCG.getSizes(N); 6212 // Emit threadprivate global variable if the type is non-constant 6213 // (Sizes.second = nullptr). 6214 if (Sizes.second) { 6215 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6216 /*isSigned=*/false); 6217 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6218 CGF, CGM.getContext().getSizeType(), 6219 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6220 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6221 } 6222 } 6223 6224 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6225 SourceLocation Loc, 6226 llvm::Value *ReductionsPtr, 6227 LValue SharedLVal) { 6228 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6229 // *d); 6230 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6231 CGM.IntTy, 6232 /*isSigned=*/true), 6233 ReductionsPtr, 6234 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6235 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6236 return Address( 6237 CGF.EmitRuntimeCall( 6238 OMPBuilder.getOrCreateRuntimeFunction( 6239 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6240 Args), 6241 SharedLVal.getAlignment()); 6242 } 6243 6244 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6245 const OMPTaskDataTy &Data) { 6246 if (!CGF.HaveInsertPoint()) 6247 return; 6248 6249 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6250 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6251 OMPBuilder.createTaskwait(CGF.Builder); 6252 } else { 6253 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6254 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6255 auto &M = CGM.getModule(); 6256 Address DependenciesArray = Address::invalid(); 6257 llvm::Value *NumOfElements; 6258 std::tie(NumOfElements, DependenciesArray) = 6259 emitDependClause(CGF, Data.Dependences, Loc); 6260 llvm::Value *DepWaitTaskArgs[6]; 6261 if (!Data.Dependences.empty()) { 6262 DepWaitTaskArgs[0] = UpLoc; 6263 DepWaitTaskArgs[1] = ThreadID; 6264 DepWaitTaskArgs[2] = NumOfElements; 6265 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6266 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6267 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6268 6269 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6270 6271 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6272 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6273 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6274 // is specified. 6275 CGF.EmitRuntimeCall( 6276 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6277 DepWaitTaskArgs); 6278 6279 } else { 6280 6281 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6282 // global_tid); 6283 llvm::Value *Args[] = {UpLoc, ThreadID}; 6284 // Ignore return result until untied tasks are supported. 6285 CGF.EmitRuntimeCall( 6286 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6287 Args); 6288 } 6289 } 6290 6291 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6292 Region->emitUntiedSwitch(CGF); 6293 } 6294 6295 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6296 OpenMPDirectiveKind InnerKind, 6297 const RegionCodeGenTy &CodeGen, 6298 bool HasCancel) { 6299 if (!CGF.HaveInsertPoint()) 6300 return; 6301 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6302 InnerKind != OMPD_critical && 6303 InnerKind != OMPD_master && 6304 InnerKind != OMPD_masked); 6305 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6306 } 6307 6308 namespace { 6309 enum RTCancelKind { 6310 CancelNoreq = 0, 6311 CancelParallel = 1, 6312 CancelLoop = 2, 6313 CancelSections = 3, 6314 CancelTaskgroup = 4 6315 }; 6316 } // anonymous namespace 6317 6318 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6319 RTCancelKind CancelKind = CancelNoreq; 6320 if (CancelRegion == OMPD_parallel) 6321 CancelKind = CancelParallel; 6322 else if (CancelRegion == OMPD_for) 6323 CancelKind = CancelLoop; 6324 else if (CancelRegion == OMPD_sections) 6325 CancelKind = CancelSections; 6326 else { 6327 assert(CancelRegion == OMPD_taskgroup); 6328 CancelKind = CancelTaskgroup; 6329 } 6330 return CancelKind; 6331 } 6332 6333 void CGOpenMPRuntime::emitCancellationPointCall( 6334 CodeGenFunction &CGF, SourceLocation Loc, 6335 OpenMPDirectiveKind CancelRegion) { 6336 if (!CGF.HaveInsertPoint()) 6337 return; 6338 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6339 // global_tid, kmp_int32 cncl_kind); 6340 if (auto *OMPRegionInfo = 6341 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6342 // For 'cancellation point taskgroup', the task region info may not have a 6343 // cancel. This may instead happen in another adjacent task. 6344 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6345 llvm::Value *Args[] = { 6346 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6347 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6348 // Ignore return result until untied tasks are supported. 6349 llvm::Value *Result = CGF.EmitRuntimeCall( 6350 OMPBuilder.getOrCreateRuntimeFunction( 6351 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6352 Args); 6353 // if (__kmpc_cancellationpoint()) { 6354 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6355 // exit from construct; 6356 // } 6357 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6358 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6359 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6360 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6361 CGF.EmitBlock(ExitBB); 6362 if (CancelRegion == OMPD_parallel) 6363 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6364 // exit from construct; 6365 CodeGenFunction::JumpDest CancelDest = 6366 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6367 CGF.EmitBranchThroughCleanup(CancelDest); 6368 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6369 } 6370 } 6371 } 6372 6373 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6374 const Expr *IfCond, 6375 OpenMPDirectiveKind CancelRegion) { 6376 if (!CGF.HaveInsertPoint()) 6377 return; 6378 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6379 // kmp_int32 cncl_kind); 6380 auto &M = CGM.getModule(); 6381 if (auto *OMPRegionInfo = 6382 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6383 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6384 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6385 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6386 llvm::Value *Args[] = { 6387 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6388 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6389 // Ignore return result until untied tasks are supported. 6390 llvm::Value *Result = CGF.EmitRuntimeCall( 6391 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6392 // if (__kmpc_cancel()) { 6393 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6394 // exit from construct; 6395 // } 6396 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6397 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6398 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6399 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6400 CGF.EmitBlock(ExitBB); 6401 if (CancelRegion == OMPD_parallel) 6402 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6403 // exit from construct; 6404 CodeGenFunction::JumpDest CancelDest = 6405 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6406 CGF.EmitBranchThroughCleanup(CancelDest); 6407 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6408 }; 6409 if (IfCond) { 6410 emitIfClause(CGF, IfCond, ThenGen, 6411 [](CodeGenFunction &, PrePostActionTy &) {}); 6412 } else { 6413 RegionCodeGenTy ThenRCG(ThenGen); 6414 ThenRCG(CGF); 6415 } 6416 } 6417 } 6418 6419 namespace { 6420 /// Cleanup action for uses_allocators support. 6421 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6422 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6423 6424 public: 6425 OMPUsesAllocatorsActionTy( 6426 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6427 : Allocators(Allocators) {} 6428 void Enter(CodeGenFunction &CGF) override { 6429 if (!CGF.HaveInsertPoint()) 6430 return; 6431 for (const auto &AllocatorData : Allocators) { 6432 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6433 CGF, AllocatorData.first, AllocatorData.second); 6434 } 6435 } 6436 void Exit(CodeGenFunction &CGF) override { 6437 if (!CGF.HaveInsertPoint()) 6438 return; 6439 for (const auto &AllocatorData : Allocators) { 6440 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6441 AllocatorData.first); 6442 } 6443 } 6444 }; 6445 } // namespace 6446 6447 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6448 const OMPExecutableDirective &D, StringRef ParentName, 6449 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6450 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6451 assert(!ParentName.empty() && "Invalid target region parent name!"); 6452 HasEmittedTargetRegion = true; 6453 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6454 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6455 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6456 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6457 if (!D.AllocatorTraits) 6458 continue; 6459 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6460 } 6461 } 6462 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6463 CodeGen.setAction(UsesAllocatorAction); 6464 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6465 IsOffloadEntry, CodeGen); 6466 } 6467 6468 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6469 const Expr *Allocator, 6470 const Expr *AllocatorTraits) { 6471 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6472 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6473 // Use default memspace handle. 6474 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6475 llvm::Value *NumTraits = llvm::ConstantInt::get( 6476 CGF.IntTy, cast<ConstantArrayType>( 6477 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6478 ->getSize() 6479 .getLimitedValue()); 6480 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6481 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6482 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6483 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6484 AllocatorTraitsLVal.getBaseInfo(), 6485 AllocatorTraitsLVal.getTBAAInfo()); 6486 llvm::Value *Traits = 6487 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6488 6489 llvm::Value *AllocatorVal = 6490 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6491 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6492 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6493 // Store to allocator. 6494 CGF.EmitVarDecl(*cast<VarDecl>( 6495 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6496 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6497 AllocatorVal = 6498 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6499 Allocator->getType(), Allocator->getExprLoc()); 6500 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6501 } 6502 6503 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6504 const Expr *Allocator) { 6505 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6506 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6507 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6508 llvm::Value *AllocatorVal = 6509 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6510 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6511 CGF.getContext().VoidPtrTy, 6512 Allocator->getExprLoc()); 6513 (void)CGF.EmitRuntimeCall( 6514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6515 OMPRTL___kmpc_destroy_allocator), 6516 {ThreadId, AllocatorVal}); 6517 } 6518 6519 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6520 const OMPExecutableDirective &D, StringRef ParentName, 6521 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6522 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6523 // Create a unique name for the entry function using the source location 6524 // information of the current target region. The name will be something like: 6525 // 6526 // __omp_offloading_DD_FFFF_PP_lBB 6527 // 6528 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6529 // mangled name of the function that encloses the target region and BB is the 6530 // line number of the target region. 6531 6532 unsigned DeviceID; 6533 unsigned FileID; 6534 unsigned Line; 6535 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6536 Line); 6537 SmallString<64> EntryFnName; 6538 { 6539 llvm::raw_svector_ostream OS(EntryFnName); 6540 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6541 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6542 } 6543 6544 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6545 6546 CodeGenFunction CGF(CGM, true); 6547 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6548 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6549 6550 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6551 6552 // If this target outline function is not an offload entry, we don't need to 6553 // register it. 6554 if (!IsOffloadEntry) 6555 return; 6556 6557 // The target region ID is used by the runtime library to identify the current 6558 // target region, so it only has to be unique and not necessarily point to 6559 // anything. It could be the pointer to the outlined function that implements 6560 // the target region, but we aren't using that so that the compiler doesn't 6561 // need to keep that, and could therefore inline the host function if proven 6562 // worthwhile during optimization. In the other hand, if emitting code for the 6563 // device, the ID has to be the function address so that it can retrieved from 6564 // the offloading entry and launched by the runtime library. We also mark the 6565 // outlined function to have external linkage in case we are emitting code for 6566 // the device, because these functions will be entry points to the device. 6567 6568 if (CGM.getLangOpts().OpenMPIsDevice) { 6569 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6570 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6571 OutlinedFn->setDSOLocal(false); 6572 if (CGM.getTriple().isAMDGCN()) 6573 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6574 } else { 6575 std::string Name = getName({EntryFnName, "region_id"}); 6576 OutlinedFnID = new llvm::GlobalVariable( 6577 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6578 llvm::GlobalValue::WeakAnyLinkage, 6579 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6580 } 6581 6582 // Register the information for the entry associated with this target region. 6583 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6584 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6585 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6586 6587 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6588 int32_t DefaultValTeams = -1; 6589 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6590 if (DefaultValTeams > 0) { 6591 OutlinedFn->addFnAttr("omp_target_num_teams", 6592 std::to_string(DefaultValTeams)); 6593 } 6594 int32_t DefaultValThreads = -1; 6595 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6596 if (DefaultValThreads > 0) { 6597 OutlinedFn->addFnAttr("omp_target_thread_limit", 6598 std::to_string(DefaultValThreads)); 6599 } 6600 6601 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6602 } 6603 6604 /// Checks if the expression is constant or does not have non-trivial function 6605 /// calls. 6606 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6607 // We can skip constant expressions. 6608 // We can skip expressions with trivial calls or simple expressions. 6609 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6610 !E->hasNonTrivialCall(Ctx)) && 6611 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6612 } 6613 6614 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6615 const Stmt *Body) { 6616 const Stmt *Child = Body->IgnoreContainers(); 6617 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6618 Child = nullptr; 6619 for (const Stmt *S : C->body()) { 6620 if (const auto *E = dyn_cast<Expr>(S)) { 6621 if (isTrivial(Ctx, E)) 6622 continue; 6623 } 6624 // Some of the statements can be ignored. 6625 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6626 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6627 continue; 6628 // Analyze declarations. 6629 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6630 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6631 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6632 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6633 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6634 isa<UsingDirectiveDecl>(D) || 6635 isa<OMPDeclareReductionDecl>(D) || 6636 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6637 return true; 6638 const auto *VD = dyn_cast<VarDecl>(D); 6639 if (!VD) 6640 return false; 6641 return VD->hasGlobalStorage() || !VD->isUsed(); 6642 })) 6643 continue; 6644 } 6645 // Found multiple children - cannot get the one child only. 6646 if (Child) 6647 return nullptr; 6648 Child = S; 6649 } 6650 if (Child) 6651 Child = Child->IgnoreContainers(); 6652 } 6653 return Child; 6654 } 6655 6656 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6657 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6658 int32_t &DefaultVal) { 6659 6660 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6661 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6662 "Expected target-based executable directive."); 6663 switch (DirectiveKind) { 6664 case OMPD_target: { 6665 const auto *CS = D.getInnermostCapturedStmt(); 6666 const auto *Body = 6667 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6668 const Stmt *ChildStmt = 6669 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6670 if (const auto *NestedDir = 6671 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6672 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6673 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6674 const Expr *NumTeams = 6675 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6676 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6677 if (auto Constant = 6678 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6679 DefaultVal = Constant->getExtValue(); 6680 return NumTeams; 6681 } 6682 DefaultVal = 0; 6683 return nullptr; 6684 } 6685 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6686 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6687 DefaultVal = 1; 6688 return nullptr; 6689 } 6690 DefaultVal = 1; 6691 return nullptr; 6692 } 6693 // A value of -1 is used to check if we need to emit no teams region 6694 DefaultVal = -1; 6695 return nullptr; 6696 } 6697 case OMPD_target_teams: 6698 case OMPD_target_teams_distribute: 6699 case OMPD_target_teams_distribute_simd: 6700 case OMPD_target_teams_distribute_parallel_for: 6701 case OMPD_target_teams_distribute_parallel_for_simd: { 6702 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6703 const Expr *NumTeams = 6704 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6705 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6706 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6707 DefaultVal = Constant->getExtValue(); 6708 return NumTeams; 6709 } 6710 DefaultVal = 0; 6711 return nullptr; 6712 } 6713 case OMPD_target_parallel: 6714 case OMPD_target_parallel_for: 6715 case OMPD_target_parallel_for_simd: 6716 case OMPD_target_simd: 6717 DefaultVal = 1; 6718 return nullptr; 6719 case OMPD_parallel: 6720 case OMPD_for: 6721 case OMPD_parallel_for: 6722 case OMPD_parallel_master: 6723 case OMPD_parallel_sections: 6724 case OMPD_for_simd: 6725 case OMPD_parallel_for_simd: 6726 case OMPD_cancel: 6727 case OMPD_cancellation_point: 6728 case OMPD_ordered: 6729 case OMPD_threadprivate: 6730 case OMPD_allocate: 6731 case OMPD_task: 6732 case OMPD_simd: 6733 case OMPD_tile: 6734 case OMPD_unroll: 6735 case OMPD_sections: 6736 case OMPD_section: 6737 case OMPD_single: 6738 case OMPD_master: 6739 case OMPD_critical: 6740 case OMPD_taskyield: 6741 case OMPD_barrier: 6742 case OMPD_taskwait: 6743 case OMPD_taskgroup: 6744 case OMPD_atomic: 6745 case OMPD_flush: 6746 case OMPD_depobj: 6747 case OMPD_scan: 6748 case OMPD_teams: 6749 case OMPD_target_data: 6750 case OMPD_target_exit_data: 6751 case OMPD_target_enter_data: 6752 case OMPD_distribute: 6753 case OMPD_distribute_simd: 6754 case OMPD_distribute_parallel_for: 6755 case OMPD_distribute_parallel_for_simd: 6756 case OMPD_teams_distribute: 6757 case OMPD_teams_distribute_simd: 6758 case OMPD_teams_distribute_parallel_for: 6759 case OMPD_teams_distribute_parallel_for_simd: 6760 case OMPD_target_update: 6761 case OMPD_declare_simd: 6762 case OMPD_declare_variant: 6763 case OMPD_begin_declare_variant: 6764 case OMPD_end_declare_variant: 6765 case OMPD_declare_target: 6766 case OMPD_end_declare_target: 6767 case OMPD_declare_reduction: 6768 case OMPD_declare_mapper: 6769 case OMPD_taskloop: 6770 case OMPD_taskloop_simd: 6771 case OMPD_master_taskloop: 6772 case OMPD_master_taskloop_simd: 6773 case OMPD_parallel_master_taskloop: 6774 case OMPD_parallel_master_taskloop_simd: 6775 case OMPD_requires: 6776 case OMPD_metadirective: 6777 case OMPD_unknown: 6778 break; 6779 default: 6780 break; 6781 } 6782 llvm_unreachable("Unexpected directive kind."); 6783 } 6784 6785 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6786 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6787 assert(!CGF.getLangOpts().OpenMPIsDevice && 6788 "Clauses associated with the teams directive expected to be emitted " 6789 "only for the host!"); 6790 CGBuilderTy &Bld = CGF.Builder; 6791 int32_t DefaultNT = -1; 6792 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6793 if (NumTeams != nullptr) { 6794 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6795 6796 switch (DirectiveKind) { 6797 case OMPD_target: { 6798 const auto *CS = D.getInnermostCapturedStmt(); 6799 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6800 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6801 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6802 /*IgnoreResultAssign*/ true); 6803 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6804 /*isSigned=*/true); 6805 } 6806 case OMPD_target_teams: 6807 case OMPD_target_teams_distribute: 6808 case OMPD_target_teams_distribute_simd: 6809 case OMPD_target_teams_distribute_parallel_for: 6810 case OMPD_target_teams_distribute_parallel_for_simd: { 6811 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6812 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6813 /*IgnoreResultAssign*/ true); 6814 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6815 /*isSigned=*/true); 6816 } 6817 default: 6818 break; 6819 } 6820 } else if (DefaultNT == -1) { 6821 return nullptr; 6822 } 6823 6824 return Bld.getInt32(DefaultNT); 6825 } 6826 6827 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6828 llvm::Value *DefaultThreadLimitVal) { 6829 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6830 CGF.getContext(), CS->getCapturedStmt()); 6831 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6832 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6833 llvm::Value *NumThreads = nullptr; 6834 llvm::Value *CondVal = nullptr; 6835 // Handle if clause. If if clause present, the number of threads is 6836 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6837 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6838 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6840 const OMPIfClause *IfClause = nullptr; 6841 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6842 if (C->getNameModifier() == OMPD_unknown || 6843 C->getNameModifier() == OMPD_parallel) { 6844 IfClause = C; 6845 break; 6846 } 6847 } 6848 if (IfClause) { 6849 const Expr *Cond = IfClause->getCondition(); 6850 bool Result; 6851 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6852 if (!Result) 6853 return CGF.Builder.getInt32(1); 6854 } else { 6855 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6856 if (const auto *PreInit = 6857 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6858 for (const auto *I : PreInit->decls()) { 6859 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6860 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6861 } else { 6862 CodeGenFunction::AutoVarEmission Emission = 6863 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6864 CGF.EmitAutoVarCleanups(Emission); 6865 } 6866 } 6867 } 6868 CondVal = CGF.EvaluateExprAsBool(Cond); 6869 } 6870 } 6871 } 6872 // Check the value of num_threads clause iff if clause was not specified 6873 // or is not evaluated to false. 6874 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6875 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6876 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6877 const auto *NumThreadsClause = 6878 Dir->getSingleClause<OMPNumThreadsClause>(); 6879 CodeGenFunction::LexicalScope Scope( 6880 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6881 if (const auto *PreInit = 6882 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6883 for (const auto *I : PreInit->decls()) { 6884 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6885 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6886 } else { 6887 CodeGenFunction::AutoVarEmission Emission = 6888 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6889 CGF.EmitAutoVarCleanups(Emission); 6890 } 6891 } 6892 } 6893 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6894 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6895 /*isSigned=*/false); 6896 if (DefaultThreadLimitVal) 6897 NumThreads = CGF.Builder.CreateSelect( 6898 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6899 DefaultThreadLimitVal, NumThreads); 6900 } else { 6901 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6902 : CGF.Builder.getInt32(0); 6903 } 6904 // Process condition of the if clause. 6905 if (CondVal) { 6906 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6907 CGF.Builder.getInt32(1)); 6908 } 6909 return NumThreads; 6910 } 6911 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6912 return CGF.Builder.getInt32(1); 6913 return DefaultThreadLimitVal; 6914 } 6915 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6916 : CGF.Builder.getInt32(0); 6917 } 6918 6919 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6920 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6921 int32_t &DefaultVal) { 6922 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6923 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6924 "Expected target-based executable directive."); 6925 6926 switch (DirectiveKind) { 6927 case OMPD_target: 6928 // Teams have no clause thread_limit 6929 return nullptr; 6930 case OMPD_target_teams: 6931 case OMPD_target_teams_distribute: 6932 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6933 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6934 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6935 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6936 if (auto Constant = 6937 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6938 DefaultVal = Constant->getExtValue(); 6939 return ThreadLimit; 6940 } 6941 return nullptr; 6942 case OMPD_target_parallel: 6943 case OMPD_target_parallel_for: 6944 case OMPD_target_parallel_for_simd: 6945 case OMPD_target_teams_distribute_parallel_for: 6946 case OMPD_target_teams_distribute_parallel_for_simd: { 6947 Expr *ThreadLimit = nullptr; 6948 Expr *NumThreads = nullptr; 6949 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6950 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6951 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6952 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6953 if (auto Constant = 6954 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6955 DefaultVal = Constant->getExtValue(); 6956 } 6957 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6958 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6959 NumThreads = NumThreadsClause->getNumThreads(); 6960 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6961 if (auto Constant = 6962 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6963 if (Constant->getExtValue() < DefaultVal) { 6964 DefaultVal = Constant->getExtValue(); 6965 ThreadLimit = NumThreads; 6966 } 6967 } 6968 } 6969 } 6970 return ThreadLimit; 6971 } 6972 case OMPD_target_teams_distribute_simd: 6973 case OMPD_target_simd: 6974 DefaultVal = 1; 6975 return nullptr; 6976 case OMPD_parallel: 6977 case OMPD_for: 6978 case OMPD_parallel_for: 6979 case OMPD_parallel_master: 6980 case OMPD_parallel_sections: 6981 case OMPD_for_simd: 6982 case OMPD_parallel_for_simd: 6983 case OMPD_cancel: 6984 case OMPD_cancellation_point: 6985 case OMPD_ordered: 6986 case OMPD_threadprivate: 6987 case OMPD_allocate: 6988 case OMPD_task: 6989 case OMPD_simd: 6990 case OMPD_tile: 6991 case OMPD_unroll: 6992 case OMPD_sections: 6993 case OMPD_section: 6994 case OMPD_single: 6995 case OMPD_master: 6996 case OMPD_critical: 6997 case OMPD_taskyield: 6998 case OMPD_barrier: 6999 case OMPD_taskwait: 7000 case OMPD_taskgroup: 7001 case OMPD_atomic: 7002 case OMPD_flush: 7003 case OMPD_depobj: 7004 case OMPD_scan: 7005 case OMPD_teams: 7006 case OMPD_target_data: 7007 case OMPD_target_exit_data: 7008 case OMPD_target_enter_data: 7009 case OMPD_distribute: 7010 case OMPD_distribute_simd: 7011 case OMPD_distribute_parallel_for: 7012 case OMPD_distribute_parallel_for_simd: 7013 case OMPD_teams_distribute: 7014 case OMPD_teams_distribute_simd: 7015 case OMPD_teams_distribute_parallel_for: 7016 case OMPD_teams_distribute_parallel_for_simd: 7017 case OMPD_target_update: 7018 case OMPD_declare_simd: 7019 case OMPD_declare_variant: 7020 case OMPD_begin_declare_variant: 7021 case OMPD_end_declare_variant: 7022 case OMPD_declare_target: 7023 case OMPD_end_declare_target: 7024 case OMPD_declare_reduction: 7025 case OMPD_declare_mapper: 7026 case OMPD_taskloop: 7027 case OMPD_taskloop_simd: 7028 case OMPD_master_taskloop: 7029 case OMPD_master_taskloop_simd: 7030 case OMPD_parallel_master_taskloop: 7031 case OMPD_parallel_master_taskloop_simd: 7032 case OMPD_requires: 7033 case OMPD_unknown: 7034 break; 7035 default: 7036 break; 7037 } 7038 llvm_unreachable("Unsupported directive kind."); 7039 } 7040 7041 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7042 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7043 assert(!CGF.getLangOpts().OpenMPIsDevice && 7044 "Clauses associated with the teams directive expected to be emitted " 7045 "only for the host!"); 7046 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7047 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7048 "Expected target-based executable directive."); 7049 CGBuilderTy &Bld = CGF.Builder; 7050 llvm::Value *ThreadLimitVal = nullptr; 7051 llvm::Value *NumThreadsVal = nullptr; 7052 switch (DirectiveKind) { 7053 case OMPD_target: { 7054 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7055 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7056 return NumThreads; 7057 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7058 CGF.getContext(), CS->getCapturedStmt()); 7059 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7060 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7061 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7062 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7063 const auto *ThreadLimitClause = 7064 Dir->getSingleClause<OMPThreadLimitClause>(); 7065 CodeGenFunction::LexicalScope Scope( 7066 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7067 if (const auto *PreInit = 7068 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7069 for (const auto *I : PreInit->decls()) { 7070 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7071 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7072 } else { 7073 CodeGenFunction::AutoVarEmission Emission = 7074 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7075 CGF.EmitAutoVarCleanups(Emission); 7076 } 7077 } 7078 } 7079 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7080 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7081 ThreadLimitVal = 7082 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7083 } 7084 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7085 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7086 CS = Dir->getInnermostCapturedStmt(); 7087 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7088 CGF.getContext(), CS->getCapturedStmt()); 7089 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7090 } 7091 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7092 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7093 CS = Dir->getInnermostCapturedStmt(); 7094 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7095 return NumThreads; 7096 } 7097 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7098 return Bld.getInt32(1); 7099 } 7100 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7101 } 7102 case OMPD_target_teams: { 7103 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7104 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7105 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7106 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7107 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7108 ThreadLimitVal = 7109 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7110 } 7111 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7112 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7113 return NumThreads; 7114 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7115 CGF.getContext(), CS->getCapturedStmt()); 7116 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7117 if (Dir->getDirectiveKind() == OMPD_distribute) { 7118 CS = Dir->getInnermostCapturedStmt(); 7119 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7120 return NumThreads; 7121 } 7122 } 7123 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7124 } 7125 case OMPD_target_teams_distribute: 7126 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7127 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7128 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7129 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7130 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7131 ThreadLimitVal = 7132 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7133 } 7134 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7135 case OMPD_target_parallel: 7136 case OMPD_target_parallel_for: 7137 case OMPD_target_parallel_for_simd: 7138 case OMPD_target_teams_distribute_parallel_for: 7139 case OMPD_target_teams_distribute_parallel_for_simd: { 7140 llvm::Value *CondVal = nullptr; 7141 // Handle if clause. If if clause present, the number of threads is 7142 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7143 if (D.hasClausesOfKind<OMPIfClause>()) { 7144 const OMPIfClause *IfClause = nullptr; 7145 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7146 if (C->getNameModifier() == OMPD_unknown || 7147 C->getNameModifier() == OMPD_parallel) { 7148 IfClause = C; 7149 break; 7150 } 7151 } 7152 if (IfClause) { 7153 const Expr *Cond = IfClause->getCondition(); 7154 bool Result; 7155 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7156 if (!Result) 7157 return Bld.getInt32(1); 7158 } else { 7159 CodeGenFunction::RunCleanupsScope Scope(CGF); 7160 CondVal = CGF.EvaluateExprAsBool(Cond); 7161 } 7162 } 7163 } 7164 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7165 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7166 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7167 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7168 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7169 ThreadLimitVal = 7170 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7171 } 7172 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7173 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7174 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7175 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7176 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7177 NumThreadsVal = 7178 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7179 ThreadLimitVal = ThreadLimitVal 7180 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7181 ThreadLimitVal), 7182 NumThreadsVal, ThreadLimitVal) 7183 : NumThreadsVal; 7184 } 7185 if (!ThreadLimitVal) 7186 ThreadLimitVal = Bld.getInt32(0); 7187 if (CondVal) 7188 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7189 return ThreadLimitVal; 7190 } 7191 case OMPD_target_teams_distribute_simd: 7192 case OMPD_target_simd: 7193 return Bld.getInt32(1); 7194 case OMPD_parallel: 7195 case OMPD_for: 7196 case OMPD_parallel_for: 7197 case OMPD_parallel_master: 7198 case OMPD_parallel_sections: 7199 case OMPD_for_simd: 7200 case OMPD_parallel_for_simd: 7201 case OMPD_cancel: 7202 case OMPD_cancellation_point: 7203 case OMPD_ordered: 7204 case OMPD_threadprivate: 7205 case OMPD_allocate: 7206 case OMPD_task: 7207 case OMPD_simd: 7208 case OMPD_tile: 7209 case OMPD_unroll: 7210 case OMPD_sections: 7211 case OMPD_section: 7212 case OMPD_single: 7213 case OMPD_master: 7214 case OMPD_critical: 7215 case OMPD_taskyield: 7216 case OMPD_barrier: 7217 case OMPD_taskwait: 7218 case OMPD_taskgroup: 7219 case OMPD_atomic: 7220 case OMPD_flush: 7221 case OMPD_depobj: 7222 case OMPD_scan: 7223 case OMPD_teams: 7224 case OMPD_target_data: 7225 case OMPD_target_exit_data: 7226 case OMPD_target_enter_data: 7227 case OMPD_distribute: 7228 case OMPD_distribute_simd: 7229 case OMPD_distribute_parallel_for: 7230 case OMPD_distribute_parallel_for_simd: 7231 case OMPD_teams_distribute: 7232 case OMPD_teams_distribute_simd: 7233 case OMPD_teams_distribute_parallel_for: 7234 case OMPD_teams_distribute_parallel_for_simd: 7235 case OMPD_target_update: 7236 case OMPD_declare_simd: 7237 case OMPD_declare_variant: 7238 case OMPD_begin_declare_variant: 7239 case OMPD_end_declare_variant: 7240 case OMPD_declare_target: 7241 case OMPD_end_declare_target: 7242 case OMPD_declare_reduction: 7243 case OMPD_declare_mapper: 7244 case OMPD_taskloop: 7245 case OMPD_taskloop_simd: 7246 case OMPD_master_taskloop: 7247 case OMPD_master_taskloop_simd: 7248 case OMPD_parallel_master_taskloop: 7249 case OMPD_parallel_master_taskloop_simd: 7250 case OMPD_requires: 7251 case OMPD_metadirective: 7252 case OMPD_unknown: 7253 break; 7254 default: 7255 break; 7256 } 7257 llvm_unreachable("Unsupported directive kind."); 7258 } 7259 7260 namespace { 7261 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7262 7263 // Utility to handle information from clauses associated with a given 7264 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7265 // It provides a convenient interface to obtain the information and generate 7266 // code for that information. 7267 class MappableExprsHandler { 7268 public: 7269 /// Values for bit flags used to specify the mapping type for 7270 /// offloading. 7271 enum OpenMPOffloadMappingFlags : uint64_t { 7272 /// No flags 7273 OMP_MAP_NONE = 0x0, 7274 /// Allocate memory on the device and move data from host to device. 7275 OMP_MAP_TO = 0x01, 7276 /// Allocate memory on the device and move data from device to host. 7277 OMP_MAP_FROM = 0x02, 7278 /// Always perform the requested mapping action on the element, even 7279 /// if it was already mapped before. 7280 OMP_MAP_ALWAYS = 0x04, 7281 /// Delete the element from the device environment, ignoring the 7282 /// current reference count associated with the element. 7283 OMP_MAP_DELETE = 0x08, 7284 /// The element being mapped is a pointer-pointee pair; both the 7285 /// pointer and the pointee should be mapped. 7286 OMP_MAP_PTR_AND_OBJ = 0x10, 7287 /// This flags signals that the base address of an entry should be 7288 /// passed to the target kernel as an argument. 7289 OMP_MAP_TARGET_PARAM = 0x20, 7290 /// Signal that the runtime library has to return the device pointer 7291 /// in the current position for the data being mapped. Used when we have the 7292 /// use_device_ptr or use_device_addr clause. 7293 OMP_MAP_RETURN_PARAM = 0x40, 7294 /// This flag signals that the reference being passed is a pointer to 7295 /// private data. 7296 OMP_MAP_PRIVATE = 0x80, 7297 /// Pass the element to the device by value. 7298 OMP_MAP_LITERAL = 0x100, 7299 /// Implicit map 7300 OMP_MAP_IMPLICIT = 0x200, 7301 /// Close is a hint to the runtime to allocate memory close to 7302 /// the target device. 7303 OMP_MAP_CLOSE = 0x400, 7304 /// 0x800 is reserved for compatibility with XLC. 7305 /// Produce a runtime error if the data is not already allocated. 7306 OMP_MAP_PRESENT = 0x1000, 7307 // Increment and decrement a separate reference counter so that the data 7308 // cannot be unmapped within the associated region. Thus, this flag is 7309 // intended to be used on 'target' and 'target data' directives because they 7310 // are inherently structured. It is not intended to be used on 'target 7311 // enter data' and 'target exit data' directives because they are inherently 7312 // dynamic. 7313 // This is an OpenMP extension for the sake of OpenACC support. 7314 OMP_MAP_OMPX_HOLD = 0x2000, 7315 /// Signal that the runtime library should use args as an array of 7316 /// descriptor_dim pointers and use args_size as dims. Used when we have 7317 /// non-contiguous list items in target update directive 7318 OMP_MAP_NON_CONTIG = 0x100000000000, 7319 /// The 16 MSBs of the flags indicate whether the entry is member of some 7320 /// struct/class. 7321 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7322 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7323 }; 7324 7325 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7326 static unsigned getFlagMemberOffset() { 7327 unsigned Offset = 0; 7328 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7329 Remain = Remain >> 1) 7330 Offset++; 7331 return Offset; 7332 } 7333 7334 /// Class that holds debugging information for a data mapping to be passed to 7335 /// the runtime library. 7336 class MappingExprInfo { 7337 /// The variable declaration used for the data mapping. 7338 const ValueDecl *MapDecl = nullptr; 7339 /// The original expression used in the map clause, or null if there is 7340 /// none. 7341 const Expr *MapExpr = nullptr; 7342 7343 public: 7344 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7345 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7346 7347 const ValueDecl *getMapDecl() const { return MapDecl; } 7348 const Expr *getMapExpr() const { return MapExpr; } 7349 }; 7350 7351 /// Class that associates information with a base pointer to be passed to the 7352 /// runtime library. 7353 class BasePointerInfo { 7354 /// The base pointer. 7355 llvm::Value *Ptr = nullptr; 7356 /// The base declaration that refers to this device pointer, or null if 7357 /// there is none. 7358 const ValueDecl *DevPtrDecl = nullptr; 7359 7360 public: 7361 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7362 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7363 llvm::Value *operator*() const { return Ptr; } 7364 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7365 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7366 }; 7367 7368 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7369 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7370 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7371 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7372 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7373 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7374 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7375 7376 /// This structure contains combined information generated for mappable 7377 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7378 /// mappers, and non-contiguous information. 7379 struct MapCombinedInfoTy { 7380 struct StructNonContiguousInfo { 7381 bool IsNonContiguous = false; 7382 MapDimArrayTy Dims; 7383 MapNonContiguousArrayTy Offsets; 7384 MapNonContiguousArrayTy Counts; 7385 MapNonContiguousArrayTy Strides; 7386 }; 7387 MapExprsArrayTy Exprs; 7388 MapBaseValuesArrayTy BasePointers; 7389 MapValuesArrayTy Pointers; 7390 MapValuesArrayTy Sizes; 7391 MapFlagsArrayTy Types; 7392 MapMappersArrayTy Mappers; 7393 StructNonContiguousInfo NonContigInfo; 7394 7395 /// Append arrays in \a CurInfo. 7396 void append(MapCombinedInfoTy &CurInfo) { 7397 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7398 BasePointers.append(CurInfo.BasePointers.begin(), 7399 CurInfo.BasePointers.end()); 7400 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7401 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7402 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7403 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7404 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7405 CurInfo.NonContigInfo.Dims.end()); 7406 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7407 CurInfo.NonContigInfo.Offsets.end()); 7408 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7409 CurInfo.NonContigInfo.Counts.end()); 7410 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7411 CurInfo.NonContigInfo.Strides.end()); 7412 } 7413 }; 7414 7415 /// Map between a struct and the its lowest & highest elements which have been 7416 /// mapped. 7417 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7418 /// HE(FieldIndex, Pointer)} 7419 struct StructRangeInfoTy { 7420 MapCombinedInfoTy PreliminaryMapData; 7421 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7422 0, Address::invalid()}; 7423 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7424 0, Address::invalid()}; 7425 Address Base = Address::invalid(); 7426 Address LB = Address::invalid(); 7427 bool IsArraySection = false; 7428 bool HasCompleteRecord = false; 7429 }; 7430 7431 private: 7432 /// Kind that defines how a device pointer has to be returned. 7433 struct MapInfo { 7434 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7435 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7436 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7437 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7438 bool ReturnDevicePointer = false; 7439 bool IsImplicit = false; 7440 const ValueDecl *Mapper = nullptr; 7441 const Expr *VarRef = nullptr; 7442 bool ForDeviceAddr = false; 7443 7444 MapInfo() = default; 7445 MapInfo( 7446 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7447 OpenMPMapClauseKind MapType, 7448 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7449 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7450 bool ReturnDevicePointer, bool IsImplicit, 7451 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7452 bool ForDeviceAddr = false) 7453 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7454 MotionModifiers(MotionModifiers), 7455 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7456 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7457 }; 7458 7459 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7460 /// member and there is no map information about it, then emission of that 7461 /// entry is deferred until the whole struct has been processed. 7462 struct DeferredDevicePtrEntryTy { 7463 const Expr *IE = nullptr; 7464 const ValueDecl *VD = nullptr; 7465 bool ForDeviceAddr = false; 7466 7467 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7468 bool ForDeviceAddr) 7469 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7470 }; 7471 7472 /// The target directive from where the mappable clauses were extracted. It 7473 /// is either a executable directive or a user-defined mapper directive. 7474 llvm::PointerUnion<const OMPExecutableDirective *, 7475 const OMPDeclareMapperDecl *> 7476 CurDir; 7477 7478 /// Function the directive is being generated for. 7479 CodeGenFunction &CGF; 7480 7481 /// Set of all first private variables in the current directive. 7482 /// bool data is set to true if the variable is implicitly marked as 7483 /// firstprivate, false otherwise. 7484 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7485 7486 /// Map between device pointer declarations and their expression components. 7487 /// The key value for declarations in 'this' is null. 7488 llvm::DenseMap< 7489 const ValueDecl *, 7490 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7491 DevPointersMap; 7492 7493 /// Map between lambda declarations and their map type. 7494 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7495 7496 llvm::Value *getExprTypeSize(const Expr *E) const { 7497 QualType ExprTy = E->getType().getCanonicalType(); 7498 7499 // Calculate the size for array shaping expression. 7500 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7501 llvm::Value *Size = 7502 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7503 for (const Expr *SE : OAE->getDimensions()) { 7504 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7505 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7506 CGF.getContext().getSizeType(), 7507 SE->getExprLoc()); 7508 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7509 } 7510 return Size; 7511 } 7512 7513 // Reference types are ignored for mapping purposes. 7514 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7515 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7516 7517 // Given that an array section is considered a built-in type, we need to 7518 // do the calculation based on the length of the section instead of relying 7519 // on CGF.getTypeSize(E->getType()). 7520 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7521 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7522 OAE->getBase()->IgnoreParenImpCasts()) 7523 .getCanonicalType(); 7524 7525 // If there is no length associated with the expression and lower bound is 7526 // not specified too, that means we are using the whole length of the 7527 // base. 7528 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7529 !OAE->getLowerBound()) 7530 return CGF.getTypeSize(BaseTy); 7531 7532 llvm::Value *ElemSize; 7533 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7534 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7535 } else { 7536 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7537 assert(ATy && "Expecting array type if not a pointer type."); 7538 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7539 } 7540 7541 // If we don't have a length at this point, that is because we have an 7542 // array section with a single element. 7543 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7544 return ElemSize; 7545 7546 if (const Expr *LenExpr = OAE->getLength()) { 7547 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7548 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7549 CGF.getContext().getSizeType(), 7550 LenExpr->getExprLoc()); 7551 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7552 } 7553 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7554 OAE->getLowerBound() && "expected array_section[lb:]."); 7555 // Size = sizetype - lb * elemtype; 7556 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7557 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7558 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7559 CGF.getContext().getSizeType(), 7560 OAE->getLowerBound()->getExprLoc()); 7561 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7562 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7563 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7564 LengthVal = CGF.Builder.CreateSelect( 7565 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7566 return LengthVal; 7567 } 7568 return CGF.getTypeSize(ExprTy); 7569 } 7570 7571 /// Return the corresponding bits for a given map clause modifier. Add 7572 /// a flag marking the map as a pointer if requested. Add a flag marking the 7573 /// map as the first one of a series of maps that relate to the same map 7574 /// expression. 7575 OpenMPOffloadMappingFlags getMapTypeBits( 7576 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7577 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7578 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7579 OpenMPOffloadMappingFlags Bits = 7580 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7581 switch (MapType) { 7582 case OMPC_MAP_alloc: 7583 case OMPC_MAP_release: 7584 // alloc and release is the default behavior in the runtime library, i.e. 7585 // if we don't pass any bits alloc/release that is what the runtime is 7586 // going to do. Therefore, we don't need to signal anything for these two 7587 // type modifiers. 7588 break; 7589 case OMPC_MAP_to: 7590 Bits |= OMP_MAP_TO; 7591 break; 7592 case OMPC_MAP_from: 7593 Bits |= OMP_MAP_FROM; 7594 break; 7595 case OMPC_MAP_tofrom: 7596 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7597 break; 7598 case OMPC_MAP_delete: 7599 Bits |= OMP_MAP_DELETE; 7600 break; 7601 case OMPC_MAP_unknown: 7602 llvm_unreachable("Unexpected map type!"); 7603 } 7604 if (AddPtrFlag) 7605 Bits |= OMP_MAP_PTR_AND_OBJ; 7606 if (AddIsTargetParamFlag) 7607 Bits |= OMP_MAP_TARGET_PARAM; 7608 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7609 Bits |= OMP_MAP_ALWAYS; 7610 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7611 Bits |= OMP_MAP_CLOSE; 7612 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7613 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7614 Bits |= OMP_MAP_PRESENT; 7615 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7616 Bits |= OMP_MAP_OMPX_HOLD; 7617 if (IsNonContiguous) 7618 Bits |= OMP_MAP_NON_CONTIG; 7619 return Bits; 7620 } 7621 7622 /// Return true if the provided expression is a final array section. A 7623 /// final array section, is one whose length can't be proved to be one. 7624 bool isFinalArraySectionExpression(const Expr *E) const { 7625 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7626 7627 // It is not an array section and therefore not a unity-size one. 7628 if (!OASE) 7629 return false; 7630 7631 // An array section with no colon always refer to a single element. 7632 if (OASE->getColonLocFirst().isInvalid()) 7633 return false; 7634 7635 const Expr *Length = OASE->getLength(); 7636 7637 // If we don't have a length we have to check if the array has size 1 7638 // for this dimension. Also, we should always expect a length if the 7639 // base type is pointer. 7640 if (!Length) { 7641 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7642 OASE->getBase()->IgnoreParenImpCasts()) 7643 .getCanonicalType(); 7644 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7645 return ATy->getSize().getSExtValue() != 1; 7646 // If we don't have a constant dimension length, we have to consider 7647 // the current section as having any size, so it is not necessarily 7648 // unitary. If it happen to be unity size, that's user fault. 7649 return true; 7650 } 7651 7652 // Check if the length evaluates to 1. 7653 Expr::EvalResult Result; 7654 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7655 return true; // Can have more that size 1. 7656 7657 llvm::APSInt ConstLength = Result.Val.getInt(); 7658 return ConstLength.getSExtValue() != 1; 7659 } 7660 7661 /// Generate the base pointers, section pointers, sizes, map type bits, and 7662 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7663 /// map type, map or motion modifiers, and expression components. 7664 /// \a IsFirstComponent should be set to true if the provided set of 7665 /// components is the first associated with a capture. 7666 void generateInfoForComponentList( 7667 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7668 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7669 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7670 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7671 bool IsFirstComponentList, bool IsImplicit, 7672 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7673 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7674 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7675 OverlappedElements = llvm::None) const { 7676 // The following summarizes what has to be generated for each map and the 7677 // types below. The generated information is expressed in this order: 7678 // base pointer, section pointer, size, flags 7679 // (to add to the ones that come from the map type and modifier). 7680 // 7681 // double d; 7682 // int i[100]; 7683 // float *p; 7684 // 7685 // struct S1 { 7686 // int i; 7687 // float f[50]; 7688 // } 7689 // struct S2 { 7690 // int i; 7691 // float f[50]; 7692 // S1 s; 7693 // double *p; 7694 // struct S2 *ps; 7695 // int &ref; 7696 // } 7697 // S2 s; 7698 // S2 *ps; 7699 // 7700 // map(d) 7701 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7702 // 7703 // map(i) 7704 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7705 // 7706 // map(i[1:23]) 7707 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7708 // 7709 // map(p) 7710 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7711 // 7712 // map(p[1:24]) 7713 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7714 // in unified shared memory mode or for local pointers 7715 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7716 // 7717 // map(s) 7718 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7719 // 7720 // map(s.i) 7721 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7722 // 7723 // map(s.s.f) 7724 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7725 // 7726 // map(s.p) 7727 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7728 // 7729 // map(to: s.p[:22]) 7730 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7731 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7732 // &(s.p), &(s.p[0]), 22*sizeof(double), 7733 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7734 // (*) alloc space for struct members, only this is a target parameter 7735 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7736 // optimizes this entry out, same in the examples below) 7737 // (***) map the pointee (map: to) 7738 // 7739 // map(to: s.ref) 7740 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7741 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7742 // (*) alloc space for struct members, only this is a target parameter 7743 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7744 // optimizes this entry out, same in the examples below) 7745 // (***) map the pointee (map: to) 7746 // 7747 // map(s.ps) 7748 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7749 // 7750 // map(from: s.ps->s.i) 7751 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7752 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7753 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7754 // 7755 // map(to: s.ps->ps) 7756 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7757 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7758 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7759 // 7760 // map(s.ps->ps->ps) 7761 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7762 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7763 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7764 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7765 // 7766 // map(to: s.ps->ps->s.f[:22]) 7767 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7768 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7769 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7770 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7771 // 7772 // map(ps) 7773 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7774 // 7775 // map(ps->i) 7776 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7777 // 7778 // map(ps->s.f) 7779 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7780 // 7781 // map(from: ps->p) 7782 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7783 // 7784 // map(to: ps->p[:22]) 7785 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7786 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7787 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7788 // 7789 // map(ps->ps) 7790 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7791 // 7792 // map(from: ps->ps->s.i) 7793 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7794 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7795 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7796 // 7797 // map(from: ps->ps->ps) 7798 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7799 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7800 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7801 // 7802 // map(ps->ps->ps->ps) 7803 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7804 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7805 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7806 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7807 // 7808 // map(to: ps->ps->ps->s.f[:22]) 7809 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7810 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7811 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7812 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7813 // 7814 // map(to: s.f[:22]) map(from: s.p[:33]) 7815 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7816 // sizeof(double*) (**), TARGET_PARAM 7817 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7818 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7819 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7820 // (*) allocate contiguous space needed to fit all mapped members even if 7821 // we allocate space for members not mapped (in this example, 7822 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7823 // them as well because they fall between &s.f[0] and &s.p) 7824 // 7825 // map(from: s.f[:22]) map(to: ps->p[:33]) 7826 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7827 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7828 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7829 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7830 // (*) the struct this entry pertains to is the 2nd element in the list of 7831 // arguments, hence MEMBER_OF(2) 7832 // 7833 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7834 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7835 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7836 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7837 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7838 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7839 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7840 // (*) the struct this entry pertains to is the 4th element in the list 7841 // of arguments, hence MEMBER_OF(4) 7842 7843 // Track if the map information being generated is the first for a capture. 7844 bool IsCaptureFirstInfo = IsFirstComponentList; 7845 // When the variable is on a declare target link or in a to clause with 7846 // unified memory, a reference is needed to hold the host/device address 7847 // of the variable. 7848 bool RequiresReference = false; 7849 7850 // Scan the components from the base to the complete expression. 7851 auto CI = Components.rbegin(); 7852 auto CE = Components.rend(); 7853 auto I = CI; 7854 7855 // Track if the map information being generated is the first for a list of 7856 // components. 7857 bool IsExpressionFirstInfo = true; 7858 bool FirstPointerInComplexData = false; 7859 Address BP = Address::invalid(); 7860 const Expr *AssocExpr = I->getAssociatedExpression(); 7861 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7862 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7863 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7864 7865 if (isa<MemberExpr>(AssocExpr)) { 7866 // The base is the 'this' pointer. The content of the pointer is going 7867 // to be the base of the field being mapped. 7868 BP = CGF.LoadCXXThisAddress(); 7869 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7870 (OASE && 7871 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7872 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7873 } else if (OAShE && 7874 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7875 BP = Address( 7876 CGF.EmitScalarExpr(OAShE->getBase()), 7877 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7878 } else { 7879 // The base is the reference to the variable. 7880 // BP = &Var. 7881 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7882 if (const auto *VD = 7883 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7884 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7885 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7886 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7887 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7888 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7889 RequiresReference = true; 7890 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7891 } 7892 } 7893 } 7894 7895 // If the variable is a pointer and is being dereferenced (i.e. is not 7896 // the last component), the base has to be the pointer itself, not its 7897 // reference. References are ignored for mapping purposes. 7898 QualType Ty = 7899 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7900 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7901 // No need to generate individual map information for the pointer, it 7902 // can be associated with the combined storage if shared memory mode is 7903 // active or the base declaration is not global variable. 7904 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7905 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7906 !VD || VD->hasLocalStorage()) 7907 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7908 else 7909 FirstPointerInComplexData = true; 7910 ++I; 7911 } 7912 } 7913 7914 // Track whether a component of the list should be marked as MEMBER_OF some 7915 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7916 // in a component list should be marked as MEMBER_OF, all subsequent entries 7917 // do not belong to the base struct. E.g. 7918 // struct S2 s; 7919 // s.ps->ps->ps->f[:] 7920 // (1) (2) (3) (4) 7921 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7922 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7923 // is the pointee of ps(2) which is not member of struct s, so it should not 7924 // be marked as such (it is still PTR_AND_OBJ). 7925 // The variable is initialized to false so that PTR_AND_OBJ entries which 7926 // are not struct members are not considered (e.g. array of pointers to 7927 // data). 7928 bool ShouldBeMemberOf = false; 7929 7930 // Variable keeping track of whether or not we have encountered a component 7931 // in the component list which is a member expression. Useful when we have a 7932 // pointer or a final array section, in which case it is the previous 7933 // component in the list which tells us whether we have a member expression. 7934 // E.g. X.f[:] 7935 // While processing the final array section "[:]" it is "f" which tells us 7936 // whether we are dealing with a member of a declared struct. 7937 const MemberExpr *EncounteredME = nullptr; 7938 7939 // Track for the total number of dimension. Start from one for the dummy 7940 // dimension. 7941 uint64_t DimSize = 1; 7942 7943 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7944 bool IsPrevMemberReference = false; 7945 7946 for (; I != CE; ++I) { 7947 // If the current component is member of a struct (parent struct) mark it. 7948 if (!EncounteredME) { 7949 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7950 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7951 // as MEMBER_OF the parent struct. 7952 if (EncounteredME) { 7953 ShouldBeMemberOf = true; 7954 // Do not emit as complex pointer if this is actually not array-like 7955 // expression. 7956 if (FirstPointerInComplexData) { 7957 QualType Ty = std::prev(I) 7958 ->getAssociatedDeclaration() 7959 ->getType() 7960 .getNonReferenceType(); 7961 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7962 FirstPointerInComplexData = false; 7963 } 7964 } 7965 } 7966 7967 auto Next = std::next(I); 7968 7969 // We need to generate the addresses and sizes if this is the last 7970 // component, if the component is a pointer or if it is an array section 7971 // whose length can't be proved to be one. If this is a pointer, it 7972 // becomes the base address for the following components. 7973 7974 // A final array section, is one whose length can't be proved to be one. 7975 // If the map item is non-contiguous then we don't treat any array section 7976 // as final array section. 7977 bool IsFinalArraySection = 7978 !IsNonContiguous && 7979 isFinalArraySectionExpression(I->getAssociatedExpression()); 7980 7981 // If we have a declaration for the mapping use that, otherwise use 7982 // the base declaration of the map clause. 7983 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7984 ? I->getAssociatedDeclaration() 7985 : BaseDecl; 7986 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7987 : MapExpr; 7988 7989 // Get information on whether the element is a pointer. Have to do a 7990 // special treatment for array sections given that they are built-in 7991 // types. 7992 const auto *OASE = 7993 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7994 const auto *OAShE = 7995 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7996 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7997 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7998 bool IsPointer = 7999 OAShE || 8000 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8001 .getCanonicalType() 8002 ->isAnyPointerType()) || 8003 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8004 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8005 MapDecl && 8006 MapDecl->getType()->isLValueReferenceType(); 8007 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8008 8009 if (OASE) 8010 ++DimSize; 8011 8012 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8013 IsFinalArraySection) { 8014 // If this is not the last component, we expect the pointer to be 8015 // associated with an array expression or member expression. 8016 assert((Next == CE || 8017 isa<MemberExpr>(Next->getAssociatedExpression()) || 8018 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8019 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8020 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8021 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8022 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8023 "Unexpected expression"); 8024 8025 Address LB = Address::invalid(); 8026 Address LowestElem = Address::invalid(); 8027 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8028 const MemberExpr *E) { 8029 const Expr *BaseExpr = E->getBase(); 8030 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8031 // scalar. 8032 LValue BaseLV; 8033 if (E->isArrow()) { 8034 LValueBaseInfo BaseInfo; 8035 TBAAAccessInfo TBAAInfo; 8036 Address Addr = 8037 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8038 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8039 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8040 } else { 8041 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8042 } 8043 return BaseLV; 8044 }; 8045 if (OAShE) { 8046 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8047 CGF.getContext().getTypeAlignInChars( 8048 OAShE->getBase()->getType())); 8049 } else if (IsMemberReference) { 8050 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8051 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8052 LowestElem = CGF.EmitLValueForFieldInitialization( 8053 BaseLVal, cast<FieldDecl>(MapDecl)) 8054 .getAddress(CGF); 8055 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8056 .getAddress(CGF); 8057 } else { 8058 LowestElem = LB = 8059 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8060 .getAddress(CGF); 8061 } 8062 8063 // If this component is a pointer inside the base struct then we don't 8064 // need to create any entry for it - it will be combined with the object 8065 // it is pointing to into a single PTR_AND_OBJ entry. 8066 bool IsMemberPointerOrAddr = 8067 EncounteredME && 8068 (((IsPointer || ForDeviceAddr) && 8069 I->getAssociatedExpression() == EncounteredME) || 8070 (IsPrevMemberReference && !IsPointer) || 8071 (IsMemberReference && Next != CE && 8072 !Next->getAssociatedExpression()->getType()->isPointerType())); 8073 if (!OverlappedElements.empty() && Next == CE) { 8074 // Handle base element with the info for overlapped elements. 8075 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8076 assert(!IsPointer && 8077 "Unexpected base element with the pointer type."); 8078 // Mark the whole struct as the struct that requires allocation on the 8079 // device. 8080 PartialStruct.LowestElem = {0, LowestElem}; 8081 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8082 I->getAssociatedExpression()->getType()); 8083 Address HB = CGF.Builder.CreateConstGEP( 8084 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8085 CGF.VoidPtrTy), 8086 TypeSize.getQuantity() - 1); 8087 PartialStruct.HighestElem = { 8088 std::numeric_limits<decltype( 8089 PartialStruct.HighestElem.first)>::max(), 8090 HB}; 8091 PartialStruct.Base = BP; 8092 PartialStruct.LB = LB; 8093 assert( 8094 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8095 "Overlapped elements must be used only once for the variable."); 8096 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8097 // Emit data for non-overlapped data. 8098 OpenMPOffloadMappingFlags Flags = 8099 OMP_MAP_MEMBER_OF | 8100 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8101 /*AddPtrFlag=*/false, 8102 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8103 llvm::Value *Size = nullptr; 8104 // Do bitcopy of all non-overlapped structure elements. 8105 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8106 Component : OverlappedElements) { 8107 Address ComponentLB = Address::invalid(); 8108 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8109 Component) { 8110 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8111 const auto *FD = dyn_cast<FieldDecl>(VD); 8112 if (FD && FD->getType()->isLValueReferenceType()) { 8113 const auto *ME = 8114 cast<MemberExpr>(MC.getAssociatedExpression()); 8115 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8116 ComponentLB = 8117 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8118 .getAddress(CGF); 8119 } else { 8120 ComponentLB = 8121 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8122 .getAddress(CGF); 8123 } 8124 Size = CGF.Builder.CreatePtrDiff( 8125 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8126 CGF.EmitCastToVoidPtr(LB.getPointer())); 8127 break; 8128 } 8129 } 8130 assert(Size && "Failed to determine structure size"); 8131 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8132 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8133 CombinedInfo.Pointers.push_back(LB.getPointer()); 8134 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8135 Size, CGF.Int64Ty, /*isSigned=*/true)); 8136 CombinedInfo.Types.push_back(Flags); 8137 CombinedInfo.Mappers.push_back(nullptr); 8138 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8139 : 1); 8140 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8141 } 8142 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8143 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8144 CombinedInfo.Pointers.push_back(LB.getPointer()); 8145 Size = CGF.Builder.CreatePtrDiff( 8146 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8147 CGF.EmitCastToVoidPtr(LB.getPointer())); 8148 CombinedInfo.Sizes.push_back( 8149 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8150 CombinedInfo.Types.push_back(Flags); 8151 CombinedInfo.Mappers.push_back(nullptr); 8152 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8153 : 1); 8154 break; 8155 } 8156 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8157 if (!IsMemberPointerOrAddr || 8158 (Next == CE && MapType != OMPC_MAP_unknown)) { 8159 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8160 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8161 CombinedInfo.Pointers.push_back(LB.getPointer()); 8162 CombinedInfo.Sizes.push_back( 8163 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8164 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8165 : 1); 8166 8167 // If Mapper is valid, the last component inherits the mapper. 8168 bool HasMapper = Mapper && Next == CE; 8169 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8170 8171 // We need to add a pointer flag for each map that comes from the 8172 // same expression except for the first one. We also need to signal 8173 // this map is the first one that relates with the current capture 8174 // (there is a set of entries for each capture). 8175 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8176 MapType, MapModifiers, MotionModifiers, IsImplicit, 8177 !IsExpressionFirstInfo || RequiresReference || 8178 FirstPointerInComplexData || IsMemberReference, 8179 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8180 8181 if (!IsExpressionFirstInfo || IsMemberReference) { 8182 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8183 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8184 if (IsPointer || (IsMemberReference && Next != CE)) 8185 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8186 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8187 8188 if (ShouldBeMemberOf) { 8189 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8190 // should be later updated with the correct value of MEMBER_OF. 8191 Flags |= OMP_MAP_MEMBER_OF; 8192 // From now on, all subsequent PTR_AND_OBJ entries should not be 8193 // marked as MEMBER_OF. 8194 ShouldBeMemberOf = false; 8195 } 8196 } 8197 8198 CombinedInfo.Types.push_back(Flags); 8199 } 8200 8201 // If we have encountered a member expression so far, keep track of the 8202 // mapped member. If the parent is "*this", then the value declaration 8203 // is nullptr. 8204 if (EncounteredME) { 8205 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8206 unsigned FieldIndex = FD->getFieldIndex(); 8207 8208 // Update info about the lowest and highest elements for this struct 8209 if (!PartialStruct.Base.isValid()) { 8210 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8211 if (IsFinalArraySection) { 8212 Address HB = 8213 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8214 .getAddress(CGF); 8215 PartialStruct.HighestElem = {FieldIndex, HB}; 8216 } else { 8217 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8218 } 8219 PartialStruct.Base = BP; 8220 PartialStruct.LB = BP; 8221 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8222 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8223 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8224 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8225 } 8226 } 8227 8228 // Need to emit combined struct for array sections. 8229 if (IsFinalArraySection || IsNonContiguous) 8230 PartialStruct.IsArraySection = true; 8231 8232 // If we have a final array section, we are done with this expression. 8233 if (IsFinalArraySection) 8234 break; 8235 8236 // The pointer becomes the base for the next element. 8237 if (Next != CE) 8238 BP = IsMemberReference ? LowestElem : LB; 8239 8240 IsExpressionFirstInfo = false; 8241 IsCaptureFirstInfo = false; 8242 FirstPointerInComplexData = false; 8243 IsPrevMemberReference = IsMemberReference; 8244 } else if (FirstPointerInComplexData) { 8245 QualType Ty = Components.rbegin() 8246 ->getAssociatedDeclaration() 8247 ->getType() 8248 .getNonReferenceType(); 8249 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8250 FirstPointerInComplexData = false; 8251 } 8252 } 8253 // If ran into the whole component - allocate the space for the whole 8254 // record. 8255 if (!EncounteredME) 8256 PartialStruct.HasCompleteRecord = true; 8257 8258 if (!IsNonContiguous) 8259 return; 8260 8261 const ASTContext &Context = CGF.getContext(); 8262 8263 // For supporting stride in array section, we need to initialize the first 8264 // dimension size as 1, first offset as 0, and first count as 1 8265 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8266 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8267 MapValuesArrayTy CurStrides; 8268 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8269 uint64_t ElementTypeSize; 8270 8271 // Collect Size information for each dimension and get the element size as 8272 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8273 // should be [10, 10] and the first stride is 4 btyes. 8274 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8275 Components) { 8276 const Expr *AssocExpr = Component.getAssociatedExpression(); 8277 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8278 8279 if (!OASE) 8280 continue; 8281 8282 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8283 auto *CAT = Context.getAsConstantArrayType(Ty); 8284 auto *VAT = Context.getAsVariableArrayType(Ty); 8285 8286 // We need all the dimension size except for the last dimension. 8287 assert((VAT || CAT || &Component == &*Components.begin()) && 8288 "Should be either ConstantArray or VariableArray if not the " 8289 "first Component"); 8290 8291 // Get element size if CurStrides is empty. 8292 if (CurStrides.empty()) { 8293 const Type *ElementType = nullptr; 8294 if (CAT) 8295 ElementType = CAT->getElementType().getTypePtr(); 8296 else if (VAT) 8297 ElementType = VAT->getElementType().getTypePtr(); 8298 else 8299 assert(&Component == &*Components.begin() && 8300 "Only expect pointer (non CAT or VAT) when this is the " 8301 "first Component"); 8302 // If ElementType is null, then it means the base is a pointer 8303 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8304 // for next iteration. 8305 if (ElementType) { 8306 // For the case that having pointer as base, we need to remove one 8307 // level of indirection. 8308 if (&Component != &*Components.begin()) 8309 ElementType = ElementType->getPointeeOrArrayElementType(); 8310 ElementTypeSize = 8311 Context.getTypeSizeInChars(ElementType).getQuantity(); 8312 CurStrides.push_back( 8313 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8314 } 8315 } 8316 // Get dimension value except for the last dimension since we don't need 8317 // it. 8318 if (DimSizes.size() < Components.size() - 1) { 8319 if (CAT) 8320 DimSizes.push_back(llvm::ConstantInt::get( 8321 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8322 else if (VAT) 8323 DimSizes.push_back(CGF.Builder.CreateIntCast( 8324 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8325 /*IsSigned=*/false)); 8326 } 8327 } 8328 8329 // Skip the dummy dimension since we have already have its information. 8330 auto DI = DimSizes.begin() + 1; 8331 // Product of dimension. 8332 llvm::Value *DimProd = 8333 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8334 8335 // Collect info for non-contiguous. Notice that offset, count, and stride 8336 // are only meaningful for array-section, so we insert a null for anything 8337 // other than array-section. 8338 // Also, the size of offset, count, and stride are not the same as 8339 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8340 // count, and stride are the same as the number of non-contiguous 8341 // declaration in target update to/from clause. 8342 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8343 Components) { 8344 const Expr *AssocExpr = Component.getAssociatedExpression(); 8345 8346 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8347 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8348 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8349 /*isSigned=*/false); 8350 CurOffsets.push_back(Offset); 8351 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8352 CurStrides.push_back(CurStrides.back()); 8353 continue; 8354 } 8355 8356 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8357 8358 if (!OASE) 8359 continue; 8360 8361 // Offset 8362 const Expr *OffsetExpr = OASE->getLowerBound(); 8363 llvm::Value *Offset = nullptr; 8364 if (!OffsetExpr) { 8365 // If offset is absent, then we just set it to zero. 8366 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8367 } else { 8368 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8369 CGF.Int64Ty, 8370 /*isSigned=*/false); 8371 } 8372 CurOffsets.push_back(Offset); 8373 8374 // Count 8375 const Expr *CountExpr = OASE->getLength(); 8376 llvm::Value *Count = nullptr; 8377 if (!CountExpr) { 8378 // In Clang, once a high dimension is an array section, we construct all 8379 // the lower dimension as array section, however, for case like 8380 // arr[0:2][2], Clang construct the inner dimension as an array section 8381 // but it actually is not in an array section form according to spec. 8382 if (!OASE->getColonLocFirst().isValid() && 8383 !OASE->getColonLocSecond().isValid()) { 8384 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8385 } else { 8386 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8387 // When the length is absent it defaults to ⌈(size − 8388 // lower-bound)/stride⌉, where size is the size of the array 8389 // dimension. 8390 const Expr *StrideExpr = OASE->getStride(); 8391 llvm::Value *Stride = 8392 StrideExpr 8393 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8394 CGF.Int64Ty, /*isSigned=*/false) 8395 : nullptr; 8396 if (Stride) 8397 Count = CGF.Builder.CreateUDiv( 8398 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8399 else 8400 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8401 } 8402 } else { 8403 Count = CGF.EmitScalarExpr(CountExpr); 8404 } 8405 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8406 CurCounts.push_back(Count); 8407 8408 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8409 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8410 // Offset Count Stride 8411 // D0 0 1 4 (int) <- dummy dimension 8412 // D1 0 2 8 (2 * (1) * 4) 8413 // D2 1 2 20 (1 * (1 * 5) * 4) 8414 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8415 const Expr *StrideExpr = OASE->getStride(); 8416 llvm::Value *Stride = 8417 StrideExpr 8418 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8419 CGF.Int64Ty, /*isSigned=*/false) 8420 : nullptr; 8421 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8422 if (Stride) 8423 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8424 else 8425 CurStrides.push_back(DimProd); 8426 if (DI != DimSizes.end()) 8427 ++DI; 8428 } 8429 8430 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8431 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8432 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8433 } 8434 8435 /// Return the adjusted map modifiers if the declaration a capture refers to 8436 /// appears in a first-private clause. This is expected to be used only with 8437 /// directives that start with 'target'. 8438 MappableExprsHandler::OpenMPOffloadMappingFlags 8439 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8440 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8441 8442 // A first private variable captured by reference will use only the 8443 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8444 // declaration is known as first-private in this handler. 8445 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8446 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8447 return MappableExprsHandler::OMP_MAP_TO | 8448 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8449 return MappableExprsHandler::OMP_MAP_PRIVATE | 8450 MappableExprsHandler::OMP_MAP_TO; 8451 } 8452 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8453 if (I != LambdasMap.end()) 8454 // for map(to: lambda): using user specified map type. 8455 return getMapTypeBits( 8456 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8457 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8458 /*AddPtrFlag=*/false, 8459 /*AddIsTargetParamFlag=*/false, 8460 /*isNonContiguous=*/false); 8461 return MappableExprsHandler::OMP_MAP_TO | 8462 MappableExprsHandler::OMP_MAP_FROM; 8463 } 8464 8465 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8466 // Rotate by getFlagMemberOffset() bits. 8467 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8468 << getFlagMemberOffset()); 8469 } 8470 8471 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8472 OpenMPOffloadMappingFlags MemberOfFlag) { 8473 // If the entry is PTR_AND_OBJ but has not been marked with the special 8474 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8475 // marked as MEMBER_OF. 8476 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8477 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8478 return; 8479 8480 // Reset the placeholder value to prepare the flag for the assignment of the 8481 // proper MEMBER_OF value. 8482 Flags &= ~OMP_MAP_MEMBER_OF; 8483 Flags |= MemberOfFlag; 8484 } 8485 8486 void getPlainLayout(const CXXRecordDecl *RD, 8487 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8488 bool AsBase) const { 8489 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8490 8491 llvm::StructType *St = 8492 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8493 8494 unsigned NumElements = St->getNumElements(); 8495 llvm::SmallVector< 8496 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8497 RecordLayout(NumElements); 8498 8499 // Fill bases. 8500 for (const auto &I : RD->bases()) { 8501 if (I.isVirtual()) 8502 continue; 8503 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8504 // Ignore empty bases. 8505 if (Base->isEmpty() || CGF.getContext() 8506 .getASTRecordLayout(Base) 8507 .getNonVirtualSize() 8508 .isZero()) 8509 continue; 8510 8511 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8512 RecordLayout[FieldIndex] = Base; 8513 } 8514 // Fill in virtual bases. 8515 for (const auto &I : RD->vbases()) { 8516 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8517 // Ignore empty bases. 8518 if (Base->isEmpty()) 8519 continue; 8520 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8521 if (RecordLayout[FieldIndex]) 8522 continue; 8523 RecordLayout[FieldIndex] = Base; 8524 } 8525 // Fill in all the fields. 8526 assert(!RD->isUnion() && "Unexpected union."); 8527 for (const auto *Field : RD->fields()) { 8528 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8529 // will fill in later.) 8530 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8531 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8532 RecordLayout[FieldIndex] = Field; 8533 } 8534 } 8535 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8536 &Data : RecordLayout) { 8537 if (Data.isNull()) 8538 continue; 8539 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8540 getPlainLayout(Base, Layout, /*AsBase=*/true); 8541 else 8542 Layout.push_back(Data.get<const FieldDecl *>()); 8543 } 8544 } 8545 8546 /// Generate all the base pointers, section pointers, sizes, map types, and 8547 /// mappers for the extracted mappable expressions (all included in \a 8548 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8549 /// pair of the relevant declaration and index where it occurs is appended to 8550 /// the device pointers info array. 8551 void generateAllInfoForClauses( 8552 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8553 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8554 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8555 // We have to process the component lists that relate with the same 8556 // declaration in a single chunk so that we can generate the map flags 8557 // correctly. Therefore, we organize all lists in a map. 8558 enum MapKind { Present, Allocs, Other, Total }; 8559 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8560 SmallVector<SmallVector<MapInfo, 8>, 4>> 8561 Info; 8562 8563 // Helper function to fill the information map for the different supported 8564 // clauses. 8565 auto &&InfoGen = 8566 [&Info, &SkipVarSet]( 8567 const ValueDecl *D, MapKind Kind, 8568 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8569 OpenMPMapClauseKind MapType, 8570 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8571 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8572 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8573 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8574 if (SkipVarSet.contains(D)) 8575 return; 8576 auto It = Info.find(D); 8577 if (It == Info.end()) 8578 It = Info 8579 .insert(std::make_pair( 8580 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8581 .first; 8582 It->second[Kind].emplace_back( 8583 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8584 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8585 }; 8586 8587 for (const auto *Cl : Clauses) { 8588 const auto *C = dyn_cast<OMPMapClause>(Cl); 8589 if (!C) 8590 continue; 8591 MapKind Kind = Other; 8592 if (llvm::is_contained(C->getMapTypeModifiers(), 8593 OMPC_MAP_MODIFIER_present)) 8594 Kind = Present; 8595 else if (C->getMapType() == OMPC_MAP_alloc) 8596 Kind = Allocs; 8597 const auto *EI = C->getVarRefs().begin(); 8598 for (const auto L : C->component_lists()) { 8599 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8600 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8601 C->getMapTypeModifiers(), llvm::None, 8602 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8603 E); 8604 ++EI; 8605 } 8606 } 8607 for (const auto *Cl : Clauses) { 8608 const auto *C = dyn_cast<OMPToClause>(Cl); 8609 if (!C) 8610 continue; 8611 MapKind Kind = Other; 8612 if (llvm::is_contained(C->getMotionModifiers(), 8613 OMPC_MOTION_MODIFIER_present)) 8614 Kind = Present; 8615 const auto *EI = C->getVarRefs().begin(); 8616 for (const auto L : C->component_lists()) { 8617 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8618 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8619 C->isImplicit(), std::get<2>(L), *EI); 8620 ++EI; 8621 } 8622 } 8623 for (const auto *Cl : Clauses) { 8624 const auto *C = dyn_cast<OMPFromClause>(Cl); 8625 if (!C) 8626 continue; 8627 MapKind Kind = Other; 8628 if (llvm::is_contained(C->getMotionModifiers(), 8629 OMPC_MOTION_MODIFIER_present)) 8630 Kind = Present; 8631 const auto *EI = C->getVarRefs().begin(); 8632 for (const auto L : C->component_lists()) { 8633 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8634 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8635 C->isImplicit(), std::get<2>(L), *EI); 8636 ++EI; 8637 } 8638 } 8639 8640 // Look at the use_device_ptr clause information and mark the existing map 8641 // entries as such. If there is no map information for an entry in the 8642 // use_device_ptr list, we create one with map type 'alloc' and zero size 8643 // section. It is the user fault if that was not mapped before. If there is 8644 // no map information and the pointer is a struct member, then we defer the 8645 // emission of that entry until the whole struct has been processed. 8646 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8647 SmallVector<DeferredDevicePtrEntryTy, 4>> 8648 DeferredInfo; 8649 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8650 8651 for (const auto *Cl : Clauses) { 8652 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8653 if (!C) 8654 continue; 8655 for (const auto L : C->component_lists()) { 8656 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8657 std::get<1>(L); 8658 assert(!Components.empty() && 8659 "Not expecting empty list of components!"); 8660 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8661 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8662 const Expr *IE = Components.back().getAssociatedExpression(); 8663 // If the first component is a member expression, we have to look into 8664 // 'this', which maps to null in the map of map information. Otherwise 8665 // look directly for the information. 8666 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8667 8668 // We potentially have map information for this declaration already. 8669 // Look for the first set of components that refer to it. 8670 if (It != Info.end()) { 8671 bool Found = false; 8672 for (auto &Data : It->second) { 8673 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8674 return MI.Components.back().getAssociatedDeclaration() == VD; 8675 }); 8676 // If we found a map entry, signal that the pointer has to be 8677 // returned and move on to the next declaration. Exclude cases where 8678 // the base pointer is mapped as array subscript, array section or 8679 // array shaping. The base address is passed as a pointer to base in 8680 // this case and cannot be used as a base for use_device_ptr list 8681 // item. 8682 if (CI != Data.end()) { 8683 auto PrevCI = std::next(CI->Components.rbegin()); 8684 const auto *VarD = dyn_cast<VarDecl>(VD); 8685 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8686 isa<MemberExpr>(IE) || 8687 !VD->getType().getNonReferenceType()->isPointerType() || 8688 PrevCI == CI->Components.rend() || 8689 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8690 VarD->hasLocalStorage()) { 8691 CI->ReturnDevicePointer = true; 8692 Found = true; 8693 break; 8694 } 8695 } 8696 } 8697 if (Found) 8698 continue; 8699 } 8700 8701 // We didn't find any match in our map information - generate a zero 8702 // size array section - if the pointer is a struct member we defer this 8703 // action until the whole struct has been processed. 8704 if (isa<MemberExpr>(IE)) { 8705 // Insert the pointer into Info to be processed by 8706 // generateInfoForComponentList. Because it is a member pointer 8707 // without a pointee, no entry will be generated for it, therefore 8708 // we need to generate one after the whole struct has been processed. 8709 // Nonetheless, generateInfoForComponentList must be called to take 8710 // the pointer into account for the calculation of the range of the 8711 // partial struct. 8712 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8713 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8714 nullptr); 8715 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8716 } else { 8717 llvm::Value *Ptr = 8718 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8719 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8720 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8721 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8722 UseDevicePtrCombinedInfo.Sizes.push_back( 8723 llvm::Constant::getNullValue(CGF.Int64Ty)); 8724 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8725 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8726 } 8727 } 8728 } 8729 8730 // Look at the use_device_addr clause information and mark the existing map 8731 // entries as such. If there is no map information for an entry in the 8732 // use_device_addr list, we create one with map type 'alloc' and zero size 8733 // section. It is the user fault if that was not mapped before. If there is 8734 // no map information and the pointer is a struct member, then we defer the 8735 // emission of that entry until the whole struct has been processed. 8736 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8737 for (const auto *Cl : Clauses) { 8738 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8739 if (!C) 8740 continue; 8741 for (const auto L : C->component_lists()) { 8742 assert(!std::get<1>(L).empty() && 8743 "Not expecting empty list of components!"); 8744 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8745 if (!Processed.insert(VD).second) 8746 continue; 8747 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8748 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8749 // If the first component is a member expression, we have to look into 8750 // 'this', which maps to null in the map of map information. Otherwise 8751 // look directly for the information. 8752 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8753 8754 // We potentially have map information for this declaration already. 8755 // Look for the first set of components that refer to it. 8756 if (It != Info.end()) { 8757 bool Found = false; 8758 for (auto &Data : It->second) { 8759 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8760 return MI.Components.back().getAssociatedDeclaration() == VD; 8761 }); 8762 // If we found a map entry, signal that the pointer has to be 8763 // returned and move on to the next declaration. 8764 if (CI != Data.end()) { 8765 CI->ReturnDevicePointer = true; 8766 Found = true; 8767 break; 8768 } 8769 } 8770 if (Found) 8771 continue; 8772 } 8773 8774 // We didn't find any match in our map information - generate a zero 8775 // size array section - if the pointer is a struct member we defer this 8776 // action until the whole struct has been processed. 8777 if (isa<MemberExpr>(IE)) { 8778 // Insert the pointer into Info to be processed by 8779 // generateInfoForComponentList. Because it is a member pointer 8780 // without a pointee, no entry will be generated for it, therefore 8781 // we need to generate one after the whole struct has been processed. 8782 // Nonetheless, generateInfoForComponentList must be called to take 8783 // the pointer into account for the calculation of the range of the 8784 // partial struct. 8785 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8786 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8787 nullptr, nullptr, /*ForDeviceAddr=*/true); 8788 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8789 } else { 8790 llvm::Value *Ptr; 8791 if (IE->isGLValue()) 8792 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8793 else 8794 Ptr = CGF.EmitScalarExpr(IE); 8795 CombinedInfo.Exprs.push_back(VD); 8796 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8797 CombinedInfo.Pointers.push_back(Ptr); 8798 CombinedInfo.Sizes.push_back( 8799 llvm::Constant::getNullValue(CGF.Int64Ty)); 8800 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8801 CombinedInfo.Mappers.push_back(nullptr); 8802 } 8803 } 8804 } 8805 8806 for (const auto &Data : Info) { 8807 StructRangeInfoTy PartialStruct; 8808 // Temporary generated information. 8809 MapCombinedInfoTy CurInfo; 8810 const Decl *D = Data.first; 8811 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8812 for (const auto &M : Data.second) { 8813 for (const MapInfo &L : M) { 8814 assert(!L.Components.empty() && 8815 "Not expecting declaration with no component lists."); 8816 8817 // Remember the current base pointer index. 8818 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8819 CurInfo.NonContigInfo.IsNonContiguous = 8820 L.Components.back().isNonContiguous(); 8821 generateInfoForComponentList( 8822 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8823 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8824 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8825 8826 // If this entry relates with a device pointer, set the relevant 8827 // declaration and add the 'return pointer' flag. 8828 if (L.ReturnDevicePointer) { 8829 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8830 "Unexpected number of mapped base pointers."); 8831 8832 const ValueDecl *RelevantVD = 8833 L.Components.back().getAssociatedDeclaration(); 8834 assert(RelevantVD && 8835 "No relevant declaration related with device pointer??"); 8836 8837 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8838 RelevantVD); 8839 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8840 } 8841 } 8842 } 8843 8844 // Append any pending zero-length pointers which are struct members and 8845 // used with use_device_ptr or use_device_addr. 8846 auto CI = DeferredInfo.find(Data.first); 8847 if (CI != DeferredInfo.end()) { 8848 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8849 llvm::Value *BasePtr; 8850 llvm::Value *Ptr; 8851 if (L.ForDeviceAddr) { 8852 if (L.IE->isGLValue()) 8853 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8854 else 8855 Ptr = this->CGF.EmitScalarExpr(L.IE); 8856 BasePtr = Ptr; 8857 // Entry is RETURN_PARAM. Also, set the placeholder value 8858 // MEMBER_OF=FFFF so that the entry is later updated with the 8859 // correct value of MEMBER_OF. 8860 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8861 } else { 8862 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8863 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8864 L.IE->getExprLoc()); 8865 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8866 // placeholder value MEMBER_OF=FFFF so that the entry is later 8867 // updated with the correct value of MEMBER_OF. 8868 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8869 OMP_MAP_MEMBER_OF); 8870 } 8871 CurInfo.Exprs.push_back(L.VD); 8872 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8873 CurInfo.Pointers.push_back(Ptr); 8874 CurInfo.Sizes.push_back( 8875 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8876 CurInfo.Mappers.push_back(nullptr); 8877 } 8878 } 8879 // If there is an entry in PartialStruct it means we have a struct with 8880 // individual members mapped. Emit an extra combined entry. 8881 if (PartialStruct.Base.isValid()) { 8882 CurInfo.NonContigInfo.Dims.push_back(0); 8883 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8884 } 8885 8886 // We need to append the results of this capture to what we already 8887 // have. 8888 CombinedInfo.append(CurInfo); 8889 } 8890 // Append data for use_device_ptr clauses. 8891 CombinedInfo.append(UseDevicePtrCombinedInfo); 8892 } 8893 8894 public: 8895 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8896 : CurDir(&Dir), CGF(CGF) { 8897 // Extract firstprivate clause information. 8898 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8899 for (const auto *D : C->varlists()) 8900 FirstPrivateDecls.try_emplace( 8901 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8902 // Extract implicit firstprivates from uses_allocators clauses. 8903 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8904 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8905 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8906 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8907 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8908 /*Implicit=*/true); 8909 else if (const auto *VD = dyn_cast<VarDecl>( 8910 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8911 ->getDecl())) 8912 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8913 } 8914 } 8915 // Extract device pointer clause information. 8916 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8917 for (auto L : C->component_lists()) 8918 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8919 // Extract map information. 8920 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8921 if (C->getMapType() != OMPC_MAP_to) 8922 continue; 8923 for (auto L : C->component_lists()) { 8924 const ValueDecl *VD = std::get<0>(L); 8925 const auto *RD = VD ? VD->getType() 8926 .getCanonicalType() 8927 .getNonReferenceType() 8928 ->getAsCXXRecordDecl() 8929 : nullptr; 8930 if (RD && RD->isLambda()) 8931 LambdasMap.try_emplace(std::get<0>(L), C); 8932 } 8933 } 8934 } 8935 8936 /// Constructor for the declare mapper directive. 8937 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8938 : CurDir(&Dir), CGF(CGF) {} 8939 8940 /// Generate code for the combined entry if we have a partially mapped struct 8941 /// and take care of the mapping flags of the arguments corresponding to 8942 /// individual struct members. 8943 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8944 MapFlagsArrayTy &CurTypes, 8945 const StructRangeInfoTy &PartialStruct, 8946 const ValueDecl *VD = nullptr, 8947 bool NotTargetParams = true) const { 8948 if (CurTypes.size() == 1 && 8949 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8950 !PartialStruct.IsArraySection) 8951 return; 8952 Address LBAddr = PartialStruct.LowestElem.second; 8953 Address HBAddr = PartialStruct.HighestElem.second; 8954 if (PartialStruct.HasCompleteRecord) { 8955 LBAddr = PartialStruct.LB; 8956 HBAddr = PartialStruct.LB; 8957 } 8958 CombinedInfo.Exprs.push_back(VD); 8959 // Base is the base of the struct 8960 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8961 // Pointer is the address of the lowest element 8962 llvm::Value *LB = LBAddr.getPointer(); 8963 CombinedInfo.Pointers.push_back(LB); 8964 // There should not be a mapper for a combined entry. 8965 CombinedInfo.Mappers.push_back(nullptr); 8966 // Size is (addr of {highest+1} element) - (addr of lowest element) 8967 llvm::Value *HB = HBAddr.getPointer(); 8968 llvm::Value *HAddr = 8969 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8970 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8971 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8972 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8973 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8974 /*isSigned=*/false); 8975 CombinedInfo.Sizes.push_back(Size); 8976 // Map type is always TARGET_PARAM, if generate info for captures. 8977 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8978 : OMP_MAP_TARGET_PARAM); 8979 // If any element has the present modifier, then make sure the runtime 8980 // doesn't attempt to allocate the struct. 8981 if (CurTypes.end() != 8982 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8983 return Type & OMP_MAP_PRESENT; 8984 })) 8985 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8986 // Remove TARGET_PARAM flag from the first element 8987 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8988 // If any element has the ompx_hold modifier, then make sure the runtime 8989 // uses the hold reference count for the struct as a whole so that it won't 8990 // be unmapped by an extra dynamic reference count decrement. Add it to all 8991 // elements as well so the runtime knows which reference count to check 8992 // when determining whether it's time for device-to-host transfers of 8993 // individual elements. 8994 if (CurTypes.end() != 8995 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8996 return Type & OMP_MAP_OMPX_HOLD; 8997 })) { 8998 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8999 for (auto &M : CurTypes) 9000 M |= OMP_MAP_OMPX_HOLD; 9001 } 9002 9003 // All other current entries will be MEMBER_OF the combined entry 9004 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9005 // 0xFFFF in the MEMBER_OF field). 9006 OpenMPOffloadMappingFlags MemberOfFlag = 9007 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9008 for (auto &M : CurTypes) 9009 setCorrectMemberOfFlag(M, MemberOfFlag); 9010 } 9011 9012 /// Generate all the base pointers, section pointers, sizes, map types, and 9013 /// mappers for the extracted mappable expressions (all included in \a 9014 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9015 /// pair of the relevant declaration and index where it occurs is appended to 9016 /// the device pointers info array. 9017 void generateAllInfo( 9018 MapCombinedInfoTy &CombinedInfo, 9019 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9020 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9021 assert(CurDir.is<const OMPExecutableDirective *>() && 9022 "Expect a executable directive"); 9023 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9024 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9025 } 9026 9027 /// Generate all the base pointers, section pointers, sizes, map types, and 9028 /// mappers for the extracted map clauses of user-defined mapper (all included 9029 /// in \a CombinedInfo). 9030 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9031 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9032 "Expect a declare mapper directive"); 9033 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9034 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9035 } 9036 9037 /// Emit capture info for lambdas for variables captured by reference. 9038 void generateInfoForLambdaCaptures( 9039 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9040 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9041 const auto *RD = VD->getType() 9042 .getCanonicalType() 9043 .getNonReferenceType() 9044 ->getAsCXXRecordDecl(); 9045 if (!RD || !RD->isLambda()) 9046 return; 9047 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9048 LValue VDLVal = CGF.MakeAddrLValue( 9049 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9050 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9051 FieldDecl *ThisCapture = nullptr; 9052 RD->getCaptureFields(Captures, ThisCapture); 9053 if (ThisCapture) { 9054 LValue ThisLVal = 9055 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9056 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9057 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9058 VDLVal.getPointer(CGF)); 9059 CombinedInfo.Exprs.push_back(VD); 9060 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9061 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9062 CombinedInfo.Sizes.push_back( 9063 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9064 CGF.Int64Ty, /*isSigned=*/true)); 9065 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9066 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9067 CombinedInfo.Mappers.push_back(nullptr); 9068 } 9069 for (const LambdaCapture &LC : RD->captures()) { 9070 if (!LC.capturesVariable()) 9071 continue; 9072 const VarDecl *VD = LC.getCapturedVar(); 9073 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9074 continue; 9075 auto It = Captures.find(VD); 9076 assert(It != Captures.end() && "Found lambda capture without field."); 9077 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9078 if (LC.getCaptureKind() == LCK_ByRef) { 9079 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9080 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9081 VDLVal.getPointer(CGF)); 9082 CombinedInfo.Exprs.push_back(VD); 9083 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9084 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9085 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9086 CGF.getTypeSize( 9087 VD->getType().getCanonicalType().getNonReferenceType()), 9088 CGF.Int64Ty, /*isSigned=*/true)); 9089 } else { 9090 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9091 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9092 VDLVal.getPointer(CGF)); 9093 CombinedInfo.Exprs.push_back(VD); 9094 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9095 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9096 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9097 } 9098 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9099 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9100 CombinedInfo.Mappers.push_back(nullptr); 9101 } 9102 } 9103 9104 /// Set correct indices for lambdas captures. 9105 void adjustMemberOfForLambdaCaptures( 9106 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9107 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9108 MapFlagsArrayTy &Types) const { 9109 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9110 // Set correct member_of idx for all implicit lambda captures. 9111 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9112 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9113 continue; 9114 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9115 assert(BasePtr && "Unable to find base lambda address."); 9116 int TgtIdx = -1; 9117 for (unsigned J = I; J > 0; --J) { 9118 unsigned Idx = J - 1; 9119 if (Pointers[Idx] != BasePtr) 9120 continue; 9121 TgtIdx = Idx; 9122 break; 9123 } 9124 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9125 // All other current entries will be MEMBER_OF the combined entry 9126 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9127 // 0xFFFF in the MEMBER_OF field). 9128 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9129 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9130 } 9131 } 9132 9133 /// Generate the base pointers, section pointers, sizes, map types, and 9134 /// mappers associated to a given capture (all included in \a CombinedInfo). 9135 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9136 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9137 StructRangeInfoTy &PartialStruct) const { 9138 assert(!Cap->capturesVariableArrayType() && 9139 "Not expecting to generate map info for a variable array type!"); 9140 9141 // We need to know when we generating information for the first component 9142 const ValueDecl *VD = Cap->capturesThis() 9143 ? nullptr 9144 : Cap->getCapturedVar()->getCanonicalDecl(); 9145 9146 // for map(to: lambda): skip here, processing it in 9147 // generateDefaultMapInfo 9148 if (LambdasMap.count(VD)) 9149 return; 9150 9151 // If this declaration appears in a is_device_ptr clause we just have to 9152 // pass the pointer by value. If it is a reference to a declaration, we just 9153 // pass its value. 9154 if (DevPointersMap.count(VD)) { 9155 CombinedInfo.Exprs.push_back(VD); 9156 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9157 CombinedInfo.Pointers.push_back(Arg); 9158 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9159 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9160 /*isSigned=*/true)); 9161 CombinedInfo.Types.push_back( 9162 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9163 OMP_MAP_TARGET_PARAM); 9164 CombinedInfo.Mappers.push_back(nullptr); 9165 return; 9166 } 9167 9168 using MapData = 9169 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9170 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9171 const ValueDecl *, const Expr *>; 9172 SmallVector<MapData, 4> DeclComponentLists; 9173 assert(CurDir.is<const OMPExecutableDirective *>() && 9174 "Expect a executable directive"); 9175 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9176 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9177 const auto *EI = C->getVarRefs().begin(); 9178 for (const auto L : C->decl_component_lists(VD)) { 9179 const ValueDecl *VDecl, *Mapper; 9180 // The Expression is not correct if the mapping is implicit 9181 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9182 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9183 std::tie(VDecl, Components, Mapper) = L; 9184 assert(VDecl == VD && "We got information for the wrong declaration??"); 9185 assert(!Components.empty() && 9186 "Not expecting declaration with no component lists."); 9187 DeclComponentLists.emplace_back(Components, C->getMapType(), 9188 C->getMapTypeModifiers(), 9189 C->isImplicit(), Mapper, E); 9190 ++EI; 9191 } 9192 } 9193 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9194 const MapData &RHS) { 9195 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9196 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9197 bool HasPresent = 9198 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9199 bool HasAllocs = MapType == OMPC_MAP_alloc; 9200 MapModifiers = std::get<2>(RHS); 9201 MapType = std::get<1>(LHS); 9202 bool HasPresentR = 9203 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9204 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9205 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9206 }); 9207 9208 // Find overlapping elements (including the offset from the base element). 9209 llvm::SmallDenseMap< 9210 const MapData *, 9211 llvm::SmallVector< 9212 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9213 4> 9214 OverlappedData; 9215 size_t Count = 0; 9216 for (const MapData &L : DeclComponentLists) { 9217 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9218 OpenMPMapClauseKind MapType; 9219 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9220 bool IsImplicit; 9221 const ValueDecl *Mapper; 9222 const Expr *VarRef; 9223 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9224 L; 9225 ++Count; 9226 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9227 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9228 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9229 VarRef) = L1; 9230 auto CI = Components.rbegin(); 9231 auto CE = Components.rend(); 9232 auto SI = Components1.rbegin(); 9233 auto SE = Components1.rend(); 9234 for (; CI != CE && SI != SE; ++CI, ++SI) { 9235 if (CI->getAssociatedExpression()->getStmtClass() != 9236 SI->getAssociatedExpression()->getStmtClass()) 9237 break; 9238 // Are we dealing with different variables/fields? 9239 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9240 break; 9241 } 9242 // Found overlapping if, at least for one component, reached the head 9243 // of the components list. 9244 if (CI == CE || SI == SE) { 9245 // Ignore it if it is the same component. 9246 if (CI == CE && SI == SE) 9247 continue; 9248 const auto It = (SI == SE) ? CI : SI; 9249 // If one component is a pointer and another one is a kind of 9250 // dereference of this pointer (array subscript, section, dereference, 9251 // etc.), it is not an overlapping. 9252 // Same, if one component is a base and another component is a 9253 // dereferenced pointer memberexpr with the same base. 9254 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9255 (std::prev(It)->getAssociatedDeclaration() && 9256 std::prev(It) 9257 ->getAssociatedDeclaration() 9258 ->getType() 9259 ->isPointerType()) || 9260 (It->getAssociatedDeclaration() && 9261 It->getAssociatedDeclaration()->getType()->isPointerType() && 9262 std::next(It) != CE && std::next(It) != SE)) 9263 continue; 9264 const MapData &BaseData = CI == CE ? L : L1; 9265 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9266 SI == SE ? Components : Components1; 9267 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9268 OverlappedElements.getSecond().push_back(SubData); 9269 } 9270 } 9271 } 9272 // Sort the overlapped elements for each item. 9273 llvm::SmallVector<const FieldDecl *, 4> Layout; 9274 if (!OverlappedData.empty()) { 9275 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9276 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9277 while (BaseType != OrigType) { 9278 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9279 OrigType = BaseType->getPointeeOrArrayElementType(); 9280 } 9281 9282 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9283 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9284 else { 9285 const auto *RD = BaseType->getAsRecordDecl(); 9286 Layout.append(RD->field_begin(), RD->field_end()); 9287 } 9288 } 9289 for (auto &Pair : OverlappedData) { 9290 llvm::stable_sort( 9291 Pair.getSecond(), 9292 [&Layout]( 9293 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9294 OMPClauseMappableExprCommon::MappableExprComponentListRef 9295 Second) { 9296 auto CI = First.rbegin(); 9297 auto CE = First.rend(); 9298 auto SI = Second.rbegin(); 9299 auto SE = Second.rend(); 9300 for (; CI != CE && SI != SE; ++CI, ++SI) { 9301 if (CI->getAssociatedExpression()->getStmtClass() != 9302 SI->getAssociatedExpression()->getStmtClass()) 9303 break; 9304 // Are we dealing with different variables/fields? 9305 if (CI->getAssociatedDeclaration() != 9306 SI->getAssociatedDeclaration()) 9307 break; 9308 } 9309 9310 // Lists contain the same elements. 9311 if (CI == CE && SI == SE) 9312 return false; 9313 9314 // List with less elements is less than list with more elements. 9315 if (CI == CE || SI == SE) 9316 return CI == CE; 9317 9318 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9319 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9320 if (FD1->getParent() == FD2->getParent()) 9321 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9322 const auto *It = 9323 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9324 return FD == FD1 || FD == FD2; 9325 }); 9326 return *It == FD1; 9327 }); 9328 } 9329 9330 // Associated with a capture, because the mapping flags depend on it. 9331 // Go through all of the elements with the overlapped elements. 9332 bool IsFirstComponentList = true; 9333 for (const auto &Pair : OverlappedData) { 9334 const MapData &L = *Pair.getFirst(); 9335 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9336 OpenMPMapClauseKind MapType; 9337 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9338 bool IsImplicit; 9339 const ValueDecl *Mapper; 9340 const Expr *VarRef; 9341 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9342 L; 9343 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9344 OverlappedComponents = Pair.getSecond(); 9345 generateInfoForComponentList( 9346 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9347 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9348 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9349 IsFirstComponentList = false; 9350 } 9351 // Go through other elements without overlapped elements. 9352 for (const MapData &L : DeclComponentLists) { 9353 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9354 OpenMPMapClauseKind MapType; 9355 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9356 bool IsImplicit; 9357 const ValueDecl *Mapper; 9358 const Expr *VarRef; 9359 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9360 L; 9361 auto It = OverlappedData.find(&L); 9362 if (It == OverlappedData.end()) 9363 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9364 Components, CombinedInfo, PartialStruct, 9365 IsFirstComponentList, IsImplicit, Mapper, 9366 /*ForDeviceAddr=*/false, VD, VarRef); 9367 IsFirstComponentList = false; 9368 } 9369 } 9370 9371 /// Generate the default map information for a given capture \a CI, 9372 /// record field declaration \a RI and captured value \a CV. 9373 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9374 const FieldDecl &RI, llvm::Value *CV, 9375 MapCombinedInfoTy &CombinedInfo) const { 9376 bool IsImplicit = true; 9377 // Do the default mapping. 9378 if (CI.capturesThis()) { 9379 CombinedInfo.Exprs.push_back(nullptr); 9380 CombinedInfo.BasePointers.push_back(CV); 9381 CombinedInfo.Pointers.push_back(CV); 9382 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9383 CombinedInfo.Sizes.push_back( 9384 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9385 CGF.Int64Ty, /*isSigned=*/true)); 9386 // Default map type. 9387 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9388 } else if (CI.capturesVariableByCopy()) { 9389 const VarDecl *VD = CI.getCapturedVar(); 9390 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9391 CombinedInfo.BasePointers.push_back(CV); 9392 CombinedInfo.Pointers.push_back(CV); 9393 if (!RI.getType()->isAnyPointerType()) { 9394 // We have to signal to the runtime captures passed by value that are 9395 // not pointers. 9396 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9397 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9398 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9399 } else { 9400 // Pointers are implicitly mapped with a zero size and no flags 9401 // (other than first map that is added for all implicit maps). 9402 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9403 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9404 } 9405 auto I = FirstPrivateDecls.find(VD); 9406 if (I != FirstPrivateDecls.end()) 9407 IsImplicit = I->getSecond(); 9408 } else { 9409 assert(CI.capturesVariable() && "Expected captured reference."); 9410 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9411 QualType ElementType = PtrTy->getPointeeType(); 9412 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9413 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9414 // The default map type for a scalar/complex type is 'to' because by 9415 // default the value doesn't have to be retrieved. For an aggregate 9416 // type, the default is 'tofrom'. 9417 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9418 const VarDecl *VD = CI.getCapturedVar(); 9419 auto I = FirstPrivateDecls.find(VD); 9420 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9421 CombinedInfo.BasePointers.push_back(CV); 9422 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9423 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9424 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9425 AlignmentSource::Decl)); 9426 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9427 } else { 9428 CombinedInfo.Pointers.push_back(CV); 9429 } 9430 if (I != FirstPrivateDecls.end()) 9431 IsImplicit = I->getSecond(); 9432 } 9433 // Every default map produces a single argument which is a target parameter. 9434 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9435 9436 // Add flag stating this is an implicit map. 9437 if (IsImplicit) 9438 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9439 9440 // No user-defined mapper for default mapping. 9441 CombinedInfo.Mappers.push_back(nullptr); 9442 } 9443 }; 9444 } // anonymous namespace 9445 9446 static void emitNonContiguousDescriptor( 9447 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9448 CGOpenMPRuntime::TargetDataInfo &Info) { 9449 CodeGenModule &CGM = CGF.CGM; 9450 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9451 &NonContigInfo = CombinedInfo.NonContigInfo; 9452 9453 // Build an array of struct descriptor_dim and then assign it to 9454 // offload_args. 9455 // 9456 // struct descriptor_dim { 9457 // uint64_t offset; 9458 // uint64_t count; 9459 // uint64_t stride 9460 // }; 9461 ASTContext &C = CGF.getContext(); 9462 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9463 RecordDecl *RD; 9464 RD = C.buildImplicitRecord("descriptor_dim"); 9465 RD->startDefinition(); 9466 addFieldToRecordDecl(C, RD, Int64Ty); 9467 addFieldToRecordDecl(C, RD, Int64Ty); 9468 addFieldToRecordDecl(C, RD, Int64Ty); 9469 RD->completeDefinition(); 9470 QualType DimTy = C.getRecordType(RD); 9471 9472 enum { OffsetFD = 0, CountFD, StrideFD }; 9473 // We need two index variable here since the size of "Dims" is the same as the 9474 // size of Components, however, the size of offset, count, and stride is equal 9475 // to the size of base declaration that is non-contiguous. 9476 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9477 // Skip emitting ir if dimension size is 1 since it cannot be 9478 // non-contiguous. 9479 if (NonContigInfo.Dims[I] == 1) 9480 continue; 9481 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9482 QualType ArrayTy = 9483 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9484 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9485 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9486 unsigned RevIdx = EE - II - 1; 9487 LValue DimsLVal = CGF.MakeAddrLValue( 9488 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9489 // Offset 9490 LValue OffsetLVal = CGF.EmitLValueForField( 9491 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9492 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9493 // Count 9494 LValue CountLVal = CGF.EmitLValueForField( 9495 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9496 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9497 // Stride 9498 LValue StrideLVal = CGF.EmitLValueForField( 9499 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9500 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9501 } 9502 // args[I] = &dims 9503 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9504 DimsAddr, CGM.Int8PtrTy); 9505 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9506 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9507 Info.PointersArray, 0, I); 9508 Address PAddr(P, CGF.getPointerAlign()); 9509 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9510 ++L; 9511 } 9512 } 9513 9514 // Try to extract the base declaration from a `this->x` expression if possible. 9515 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9516 if (!E) 9517 return nullptr; 9518 9519 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9520 if (const MemberExpr *ME = 9521 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9522 return ME->getMemberDecl(); 9523 return nullptr; 9524 } 9525 9526 /// Emit a string constant containing the names of the values mapped to the 9527 /// offloading runtime library. 9528 llvm::Constant * 9529 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9530 MappableExprsHandler::MappingExprInfo &MapExprs) { 9531 9532 uint32_t SrcLocStrSize; 9533 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9534 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9535 9536 SourceLocation Loc; 9537 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9538 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9539 Loc = VD->getLocation(); 9540 else 9541 Loc = MapExprs.getMapExpr()->getExprLoc(); 9542 } else { 9543 Loc = MapExprs.getMapDecl()->getLocation(); 9544 } 9545 9546 std::string ExprName; 9547 if (MapExprs.getMapExpr()) { 9548 PrintingPolicy P(CGF.getContext().getLangOpts()); 9549 llvm::raw_string_ostream OS(ExprName); 9550 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9551 OS.flush(); 9552 } else { 9553 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9554 } 9555 9556 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9557 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9558 PLoc.getLine(), PLoc.getColumn(), 9559 SrcLocStrSize); 9560 } 9561 9562 /// Emit the arrays used to pass the captures and map information to the 9563 /// offloading runtime library. If there is no map or capture information, 9564 /// return nullptr by reference. 9565 static void emitOffloadingArrays( 9566 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9567 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9568 bool IsNonContiguous = false) { 9569 CodeGenModule &CGM = CGF.CGM; 9570 ASTContext &Ctx = CGF.getContext(); 9571 9572 // Reset the array information. 9573 Info.clearArrayInfo(); 9574 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9575 9576 if (Info.NumberOfPtrs) { 9577 // Detect if we have any capture size requiring runtime evaluation of the 9578 // size so that a constant array could be eventually used. 9579 bool hasRuntimeEvaluationCaptureSize = false; 9580 for (llvm::Value *S : CombinedInfo.Sizes) 9581 if (!isa<llvm::Constant>(S)) { 9582 hasRuntimeEvaluationCaptureSize = true; 9583 break; 9584 } 9585 9586 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9587 QualType PointerArrayType = Ctx.getConstantArrayType( 9588 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9589 /*IndexTypeQuals=*/0); 9590 9591 Info.BasePointersArray = 9592 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9593 Info.PointersArray = 9594 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9595 Address MappersArray = 9596 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9597 Info.MappersArray = MappersArray.getPointer(); 9598 9599 // If we don't have any VLA types or other types that require runtime 9600 // evaluation, we can use a constant array for the map sizes, otherwise we 9601 // need to fill up the arrays as we do for the pointers. 9602 QualType Int64Ty = 9603 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9604 if (hasRuntimeEvaluationCaptureSize) { 9605 QualType SizeArrayType = Ctx.getConstantArrayType( 9606 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9607 /*IndexTypeQuals=*/0); 9608 Info.SizesArray = 9609 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9610 } else { 9611 // We expect all the sizes to be constant, so we collect them to create 9612 // a constant array. 9613 SmallVector<llvm::Constant *, 16> ConstSizes; 9614 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9615 if (IsNonContiguous && 9616 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9617 ConstSizes.push_back(llvm::ConstantInt::get( 9618 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9619 } else { 9620 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9621 } 9622 } 9623 9624 auto *SizesArrayInit = llvm::ConstantArray::get( 9625 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9626 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9627 auto *SizesArrayGbl = new llvm::GlobalVariable( 9628 CGM.getModule(), SizesArrayInit->getType(), 9629 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9630 SizesArrayInit, Name); 9631 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9632 Info.SizesArray = SizesArrayGbl; 9633 } 9634 9635 // The map types are always constant so we don't need to generate code to 9636 // fill arrays. Instead, we create an array constant. 9637 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9638 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9639 std::string MaptypesName = 9640 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9641 auto *MapTypesArrayGbl = 9642 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9643 Info.MapTypesArray = MapTypesArrayGbl; 9644 9645 // The information types are only built if there is debug information 9646 // requested. 9647 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9648 Info.MapNamesArray = llvm::Constant::getNullValue( 9649 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9650 } else { 9651 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9652 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9653 }; 9654 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9655 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9656 std::string MapnamesName = 9657 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9658 auto *MapNamesArrayGbl = 9659 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9660 Info.MapNamesArray = MapNamesArrayGbl; 9661 } 9662 9663 // If there's a present map type modifier, it must not be applied to the end 9664 // of a region, so generate a separate map type array in that case. 9665 if (Info.separateBeginEndCalls()) { 9666 bool EndMapTypesDiffer = false; 9667 for (uint64_t &Type : Mapping) { 9668 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9669 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9670 EndMapTypesDiffer = true; 9671 } 9672 } 9673 if (EndMapTypesDiffer) { 9674 MapTypesArrayGbl = 9675 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9676 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9677 } 9678 } 9679 9680 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9681 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9682 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9683 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9684 Info.BasePointersArray, 0, I); 9685 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9686 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9687 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9688 CGF.Builder.CreateStore(BPVal, BPAddr); 9689 9690 if (Info.requiresDevicePointerInfo()) 9691 if (const ValueDecl *DevVD = 9692 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9693 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9694 9695 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9696 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9697 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9698 Info.PointersArray, 0, I); 9699 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9700 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9701 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9702 CGF.Builder.CreateStore(PVal, PAddr); 9703 9704 if (hasRuntimeEvaluationCaptureSize) { 9705 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9706 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9707 Info.SizesArray, 9708 /*Idx0=*/0, 9709 /*Idx1=*/I); 9710 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9711 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9712 CGM.Int64Ty, 9713 /*isSigned=*/true), 9714 SAddr); 9715 } 9716 9717 // Fill up the mapper array. 9718 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9719 if (CombinedInfo.Mappers[I]) { 9720 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9721 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9722 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9723 Info.HasMapper = true; 9724 } 9725 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9726 CGF.Builder.CreateStore(MFunc, MAddr); 9727 } 9728 } 9729 9730 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9731 Info.NumberOfPtrs == 0) 9732 return; 9733 9734 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9735 } 9736 9737 namespace { 9738 /// Additional arguments for emitOffloadingArraysArgument function. 9739 struct ArgumentsOptions { 9740 bool ForEndCall = false; 9741 ArgumentsOptions() = default; 9742 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9743 }; 9744 } // namespace 9745 9746 /// Emit the arguments to be passed to the runtime library based on the 9747 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9748 /// ForEndCall, emit map types to be passed for the end of the region instead of 9749 /// the beginning. 9750 static void emitOffloadingArraysArgument( 9751 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9752 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9753 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9754 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9755 const ArgumentsOptions &Options = ArgumentsOptions()) { 9756 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9757 "expected region end call to runtime only when end call is separate"); 9758 CodeGenModule &CGM = CGF.CGM; 9759 if (Info.NumberOfPtrs) { 9760 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9761 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9762 Info.BasePointersArray, 9763 /*Idx0=*/0, /*Idx1=*/0); 9764 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9765 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9766 Info.PointersArray, 9767 /*Idx0=*/0, 9768 /*Idx1=*/0); 9769 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9770 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9771 /*Idx0=*/0, /*Idx1=*/0); 9772 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9773 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9774 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9775 : Info.MapTypesArray, 9776 /*Idx0=*/0, 9777 /*Idx1=*/0); 9778 9779 // Only emit the mapper information arrays if debug information is 9780 // requested. 9781 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9782 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9783 else 9784 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9785 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9786 Info.MapNamesArray, 9787 /*Idx0=*/0, 9788 /*Idx1=*/0); 9789 // If there is no user-defined mapper, set the mapper array to nullptr to 9790 // avoid an unnecessary data privatization 9791 if (!Info.HasMapper) 9792 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9793 else 9794 MappersArrayArg = 9795 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9796 } else { 9797 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9798 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9799 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9800 MapTypesArrayArg = 9801 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9802 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9803 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9804 } 9805 } 9806 9807 /// Check for inner distribute directive. 9808 static const OMPExecutableDirective * 9809 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9810 const auto *CS = D.getInnermostCapturedStmt(); 9811 const auto *Body = 9812 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9813 const Stmt *ChildStmt = 9814 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9815 9816 if (const auto *NestedDir = 9817 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9818 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9819 switch (D.getDirectiveKind()) { 9820 case OMPD_target: 9821 if (isOpenMPDistributeDirective(DKind)) 9822 return NestedDir; 9823 if (DKind == OMPD_teams) { 9824 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9825 /*IgnoreCaptured=*/true); 9826 if (!Body) 9827 return nullptr; 9828 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9829 if (const auto *NND = 9830 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9831 DKind = NND->getDirectiveKind(); 9832 if (isOpenMPDistributeDirective(DKind)) 9833 return NND; 9834 } 9835 } 9836 return nullptr; 9837 case OMPD_target_teams: 9838 if (isOpenMPDistributeDirective(DKind)) 9839 return NestedDir; 9840 return nullptr; 9841 case OMPD_target_parallel: 9842 case OMPD_target_simd: 9843 case OMPD_target_parallel_for: 9844 case OMPD_target_parallel_for_simd: 9845 return nullptr; 9846 case OMPD_target_teams_distribute: 9847 case OMPD_target_teams_distribute_simd: 9848 case OMPD_target_teams_distribute_parallel_for: 9849 case OMPD_target_teams_distribute_parallel_for_simd: 9850 case OMPD_parallel: 9851 case OMPD_for: 9852 case OMPD_parallel_for: 9853 case OMPD_parallel_master: 9854 case OMPD_parallel_sections: 9855 case OMPD_for_simd: 9856 case OMPD_parallel_for_simd: 9857 case OMPD_cancel: 9858 case OMPD_cancellation_point: 9859 case OMPD_ordered: 9860 case OMPD_threadprivate: 9861 case OMPD_allocate: 9862 case OMPD_task: 9863 case OMPD_simd: 9864 case OMPD_tile: 9865 case OMPD_unroll: 9866 case OMPD_sections: 9867 case OMPD_section: 9868 case OMPD_single: 9869 case OMPD_master: 9870 case OMPD_critical: 9871 case OMPD_taskyield: 9872 case OMPD_barrier: 9873 case OMPD_taskwait: 9874 case OMPD_taskgroup: 9875 case OMPD_atomic: 9876 case OMPD_flush: 9877 case OMPD_depobj: 9878 case OMPD_scan: 9879 case OMPD_teams: 9880 case OMPD_target_data: 9881 case OMPD_target_exit_data: 9882 case OMPD_target_enter_data: 9883 case OMPD_distribute: 9884 case OMPD_distribute_simd: 9885 case OMPD_distribute_parallel_for: 9886 case OMPD_distribute_parallel_for_simd: 9887 case OMPD_teams_distribute: 9888 case OMPD_teams_distribute_simd: 9889 case OMPD_teams_distribute_parallel_for: 9890 case OMPD_teams_distribute_parallel_for_simd: 9891 case OMPD_target_update: 9892 case OMPD_declare_simd: 9893 case OMPD_declare_variant: 9894 case OMPD_begin_declare_variant: 9895 case OMPD_end_declare_variant: 9896 case OMPD_declare_target: 9897 case OMPD_end_declare_target: 9898 case OMPD_declare_reduction: 9899 case OMPD_declare_mapper: 9900 case OMPD_taskloop: 9901 case OMPD_taskloop_simd: 9902 case OMPD_master_taskloop: 9903 case OMPD_master_taskloop_simd: 9904 case OMPD_parallel_master_taskloop: 9905 case OMPD_parallel_master_taskloop_simd: 9906 case OMPD_requires: 9907 case OMPD_metadirective: 9908 case OMPD_unknown: 9909 default: 9910 llvm_unreachable("Unexpected directive."); 9911 } 9912 } 9913 9914 return nullptr; 9915 } 9916 9917 /// Emit the user-defined mapper function. The code generation follows the 9918 /// pattern in the example below. 9919 /// \code 9920 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9921 /// void *base, void *begin, 9922 /// int64_t size, int64_t type, 9923 /// void *name = nullptr) { 9924 /// // Allocate space for an array section first or add a base/begin for 9925 /// // pointer dereference. 9926 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9927 /// !maptype.IsDelete) 9928 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9929 /// size*sizeof(Ty), clearToFromMember(type)); 9930 /// // Map members. 9931 /// for (unsigned i = 0; i < size; i++) { 9932 /// // For each component specified by this mapper: 9933 /// for (auto c : begin[i]->all_components) { 9934 /// if (c.hasMapper()) 9935 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9936 /// c.arg_type, c.arg_name); 9937 /// else 9938 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9939 /// c.arg_begin, c.arg_size, c.arg_type, 9940 /// c.arg_name); 9941 /// } 9942 /// } 9943 /// // Delete the array section. 9944 /// if (size > 1 && maptype.IsDelete) 9945 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9946 /// size*sizeof(Ty), clearToFromMember(type)); 9947 /// } 9948 /// \endcode 9949 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9950 CodeGenFunction *CGF) { 9951 if (UDMMap.count(D) > 0) 9952 return; 9953 ASTContext &C = CGM.getContext(); 9954 QualType Ty = D->getType(); 9955 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9956 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9957 auto *MapperVarDecl = 9958 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9959 SourceLocation Loc = D->getLocation(); 9960 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9961 9962 // Prepare mapper function arguments and attributes. 9963 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9964 C.VoidPtrTy, ImplicitParamDecl::Other); 9965 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9966 ImplicitParamDecl::Other); 9967 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9968 C.VoidPtrTy, ImplicitParamDecl::Other); 9969 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9970 ImplicitParamDecl::Other); 9971 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9972 ImplicitParamDecl::Other); 9973 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9974 ImplicitParamDecl::Other); 9975 FunctionArgList Args; 9976 Args.push_back(&HandleArg); 9977 Args.push_back(&BaseArg); 9978 Args.push_back(&BeginArg); 9979 Args.push_back(&SizeArg); 9980 Args.push_back(&TypeArg); 9981 Args.push_back(&NameArg); 9982 const CGFunctionInfo &FnInfo = 9983 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9984 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9985 SmallString<64> TyStr; 9986 llvm::raw_svector_ostream Out(TyStr); 9987 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9988 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9989 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9990 Name, &CGM.getModule()); 9991 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9992 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9993 // Start the mapper function code generation. 9994 CodeGenFunction MapperCGF(CGM); 9995 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9996 // Compute the starting and end addresses of array elements. 9997 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9998 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9999 C.getPointerType(Int64Ty), Loc); 10000 // Prepare common arguments for array initiation and deletion. 10001 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10002 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10003 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10004 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10005 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10006 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10007 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10008 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10009 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10010 // Convert the size in bytes into the number of array elements. 10011 Size = MapperCGF.Builder.CreateExactUDiv( 10012 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10013 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10014 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10015 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10016 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10017 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10018 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10019 C.getPointerType(Int64Ty), Loc); 10020 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10021 MapperCGF.GetAddrOfLocalVar(&NameArg), 10022 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10023 10024 // Emit array initiation if this is an array section and \p MapType indicates 10025 // that memory allocation is required. 10026 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10027 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10028 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10029 10030 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10031 10032 // Emit the loop header block. 10033 MapperCGF.EmitBlock(HeadBB); 10034 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10035 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10036 // Evaluate whether the initial condition is satisfied. 10037 llvm::Value *IsEmpty = 10038 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10039 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10040 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10041 10042 // Emit the loop body block. 10043 MapperCGF.EmitBlock(BodyBB); 10044 llvm::BasicBlock *LastBB = BodyBB; 10045 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10046 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10047 PtrPHI->addIncoming(PtrBegin, EntryBB); 10048 Address PtrCurrent = 10049 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10050 .getAlignment() 10051 .alignmentOfArrayElement(ElementSize)); 10052 // Privatize the declared variable of mapper to be the current array element. 10053 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10054 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10055 (void)Scope.Privatize(); 10056 10057 // Get map clause information. Fill up the arrays with all mapped variables. 10058 MappableExprsHandler::MapCombinedInfoTy Info; 10059 MappableExprsHandler MEHandler(*D, MapperCGF); 10060 MEHandler.generateAllInfoForMapper(Info); 10061 10062 // Call the runtime API __tgt_mapper_num_components to get the number of 10063 // pre-existing components. 10064 llvm::Value *OffloadingArgs[] = {Handle}; 10065 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10066 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10067 OMPRTL___tgt_mapper_num_components), 10068 OffloadingArgs); 10069 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10070 PreviousSize, 10071 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10072 10073 // Fill up the runtime mapper handle for all components. 10074 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10075 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10076 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10077 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10078 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10079 llvm::Value *CurSizeArg = Info.Sizes[I]; 10080 llvm::Value *CurNameArg = 10081 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10082 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10083 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10084 10085 // Extract the MEMBER_OF field from the map type. 10086 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10087 llvm::Value *MemberMapType = 10088 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10089 10090 // Combine the map type inherited from user-defined mapper with that 10091 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10092 // bits of the \a MapType, which is the input argument of the mapper 10093 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10094 // bits of MemberMapType. 10095 // [OpenMP 5.0], 1.2.6. map-type decay. 10096 // | alloc | to | from | tofrom | release | delete 10097 // ---------------------------------------------------------- 10098 // alloc | alloc | alloc | alloc | alloc | release | delete 10099 // to | alloc | to | alloc | to | release | delete 10100 // from | alloc | alloc | from | from | release | delete 10101 // tofrom | alloc | to | from | tofrom | release | delete 10102 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10103 MapType, 10104 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10105 MappableExprsHandler::OMP_MAP_FROM)); 10106 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10107 llvm::BasicBlock *AllocElseBB = 10108 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10109 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10110 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10111 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10112 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10113 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10114 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10115 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10116 MapperCGF.EmitBlock(AllocBB); 10117 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10118 MemberMapType, 10119 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10120 MappableExprsHandler::OMP_MAP_FROM))); 10121 MapperCGF.Builder.CreateBr(EndBB); 10122 MapperCGF.EmitBlock(AllocElseBB); 10123 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10124 LeftToFrom, 10125 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10126 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10127 // In case of to, clear OMP_MAP_FROM. 10128 MapperCGF.EmitBlock(ToBB); 10129 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10130 MemberMapType, 10131 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10132 MapperCGF.Builder.CreateBr(EndBB); 10133 MapperCGF.EmitBlock(ToElseBB); 10134 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10135 LeftToFrom, 10136 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10137 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10138 // In case of from, clear OMP_MAP_TO. 10139 MapperCGF.EmitBlock(FromBB); 10140 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10141 MemberMapType, 10142 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10143 // In case of tofrom, do nothing. 10144 MapperCGF.EmitBlock(EndBB); 10145 LastBB = EndBB; 10146 llvm::PHINode *CurMapType = 10147 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10148 CurMapType->addIncoming(AllocMapType, AllocBB); 10149 CurMapType->addIncoming(ToMapType, ToBB); 10150 CurMapType->addIncoming(FromMapType, FromBB); 10151 CurMapType->addIncoming(MemberMapType, ToElseBB); 10152 10153 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10154 CurSizeArg, CurMapType, CurNameArg}; 10155 if (Info.Mappers[I]) { 10156 // Call the corresponding mapper function. 10157 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10158 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10159 assert(MapperFunc && "Expect a valid mapper function is available."); 10160 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10161 } else { 10162 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10163 // data structure. 10164 MapperCGF.EmitRuntimeCall( 10165 OMPBuilder.getOrCreateRuntimeFunction( 10166 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10167 OffloadingArgs); 10168 } 10169 } 10170 10171 // Update the pointer to point to the next element that needs to be mapped, 10172 // and check whether we have mapped all elements. 10173 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10174 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10175 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10176 PtrPHI->addIncoming(PtrNext, LastBB); 10177 llvm::Value *IsDone = 10178 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10179 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10180 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10181 10182 MapperCGF.EmitBlock(ExitBB); 10183 // Emit array deletion if this is an array section and \p MapType indicates 10184 // that deletion is required. 10185 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10186 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10187 10188 // Emit the function exit block. 10189 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10190 MapperCGF.FinishFunction(); 10191 UDMMap.try_emplace(D, Fn); 10192 if (CGF) { 10193 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10194 Decls.second.push_back(D); 10195 } 10196 } 10197 10198 /// Emit the array initialization or deletion portion for user-defined mapper 10199 /// code generation. First, it evaluates whether an array section is mapped and 10200 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10201 /// true, and \a MapType indicates to not delete this array, array 10202 /// initialization code is generated. If \a IsInit is false, and \a MapType 10203 /// indicates to not this array, array deletion code is generated. 10204 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10205 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10206 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10207 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10208 bool IsInit) { 10209 StringRef Prefix = IsInit ? ".init" : ".del"; 10210 10211 // Evaluate if this is an array section. 10212 llvm::BasicBlock *BodyBB = 10213 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10214 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10215 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10216 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10217 MapType, 10218 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10219 llvm::Value *DeleteCond; 10220 llvm::Value *Cond; 10221 if (IsInit) { 10222 // base != begin? 10223 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10224 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10225 // IsPtrAndObj? 10226 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10227 MapType, 10228 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10229 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10230 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10231 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10232 DeleteCond = MapperCGF.Builder.CreateIsNull( 10233 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10234 } else { 10235 Cond = IsArray; 10236 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10237 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10238 } 10239 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10240 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10241 10242 MapperCGF.EmitBlock(BodyBB); 10243 // Get the array size by multiplying element size and element number (i.e., \p 10244 // Size). 10245 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10246 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10247 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10248 // memory allocation/deletion purpose only. 10249 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10250 MapType, 10251 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10252 MappableExprsHandler::OMP_MAP_FROM))); 10253 MapTypeArg = MapperCGF.Builder.CreateOr( 10254 MapTypeArg, 10255 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10256 10257 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10258 // data structure. 10259 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10260 ArraySize, MapTypeArg, MapName}; 10261 MapperCGF.EmitRuntimeCall( 10262 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10263 OMPRTL___tgt_push_mapper_component), 10264 OffloadingArgs); 10265 } 10266 10267 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10268 const OMPDeclareMapperDecl *D) { 10269 auto I = UDMMap.find(D); 10270 if (I != UDMMap.end()) 10271 return I->second; 10272 emitUserDefinedMapper(D); 10273 return UDMMap.lookup(D); 10274 } 10275 10276 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10277 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10278 llvm::Value *DeviceID, 10279 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10280 const OMPLoopDirective &D)> 10281 SizeEmitter) { 10282 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10283 const OMPExecutableDirective *TD = &D; 10284 // Get nested teams distribute kind directive, if any. 10285 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10286 TD = getNestedDistributeDirective(CGM.getContext(), D); 10287 if (!TD) 10288 return; 10289 const auto *LD = cast<OMPLoopDirective>(TD); 10290 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10291 PrePostActionTy &) { 10292 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10293 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10294 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10295 CGF.EmitRuntimeCall( 10296 OMPBuilder.getOrCreateRuntimeFunction( 10297 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10298 Args); 10299 } 10300 }; 10301 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10302 } 10303 10304 void CGOpenMPRuntime::emitTargetCall( 10305 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10306 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10307 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10308 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10309 const OMPLoopDirective &D)> 10310 SizeEmitter) { 10311 if (!CGF.HaveInsertPoint()) 10312 return; 10313 10314 assert(OutlinedFn && "Invalid outlined function!"); 10315 10316 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10317 D.hasClausesOfKind<OMPNowaitClause>(); 10318 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10319 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10320 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10321 PrePostActionTy &) { 10322 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10323 }; 10324 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10325 10326 CodeGenFunction::OMPTargetDataInfo InputInfo; 10327 llvm::Value *MapTypesArray = nullptr; 10328 llvm::Value *MapNamesArray = nullptr; 10329 // Fill up the pointer arrays and transfer execution to the device. 10330 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10331 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10332 &CapturedVars, 10333 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10334 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10335 // Reverse offloading is not supported, so just execute on the host. 10336 if (RequiresOuterTask) { 10337 CapturedVars.clear(); 10338 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10339 } 10340 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10341 return; 10342 } 10343 10344 // On top of the arrays that were filled up, the target offloading call 10345 // takes as arguments the device id as well as the host pointer. The host 10346 // pointer is used by the runtime library to identify the current target 10347 // region, so it only has to be unique and not necessarily point to 10348 // anything. It could be the pointer to the outlined function that 10349 // implements the target region, but we aren't using that so that the 10350 // compiler doesn't need to keep that, and could therefore inline the host 10351 // function if proven worthwhile during optimization. 10352 10353 // From this point on, we need to have an ID of the target region defined. 10354 assert(OutlinedFnID && "Invalid outlined function ID!"); 10355 10356 // Emit device ID if any. 10357 llvm::Value *DeviceID; 10358 if (Device.getPointer()) { 10359 assert((Device.getInt() == OMPC_DEVICE_unknown || 10360 Device.getInt() == OMPC_DEVICE_device_num) && 10361 "Expected device_num modifier."); 10362 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10363 DeviceID = 10364 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10365 } else { 10366 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10367 } 10368 10369 // Emit the number of elements in the offloading arrays. 10370 llvm::Value *PointerNum = 10371 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10372 10373 // Return value of the runtime offloading call. 10374 llvm::Value *Return; 10375 10376 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10377 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10378 10379 // Source location for the ident struct 10380 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10381 10382 // Emit tripcount for the target loop-based directive. 10383 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10384 10385 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10386 // The target region is an outlined function launched by the runtime 10387 // via calls __tgt_target() or __tgt_target_teams(). 10388 // 10389 // __tgt_target() launches a target region with one team and one thread, 10390 // executing a serial region. This master thread may in turn launch 10391 // more threads within its team upon encountering a parallel region, 10392 // however, no additional teams can be launched on the device. 10393 // 10394 // __tgt_target_teams() launches a target region with one or more teams, 10395 // each with one or more threads. This call is required for target 10396 // constructs such as: 10397 // 'target teams' 10398 // 'target' / 'teams' 10399 // 'target teams distribute parallel for' 10400 // 'target parallel' 10401 // and so on. 10402 // 10403 // Note that on the host and CPU targets, the runtime implementation of 10404 // these calls simply call the outlined function without forking threads. 10405 // The outlined functions themselves have runtime calls to 10406 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10407 // the compiler in emitTeamsCall() and emitParallelCall(). 10408 // 10409 // In contrast, on the NVPTX target, the implementation of 10410 // __tgt_target_teams() launches a GPU kernel with the requested number 10411 // of teams and threads so no additional calls to the runtime are required. 10412 if (NumTeams) { 10413 // If we have NumTeams defined this means that we have an enclosed teams 10414 // region. Therefore we also expect to have NumThreads defined. These two 10415 // values should be defined in the presence of a teams directive, 10416 // regardless of having any clauses associated. If the user is using teams 10417 // but no clauses, these two values will be the default that should be 10418 // passed to the runtime library - a 32-bit integer with the value zero. 10419 assert(NumThreads && "Thread limit expression should be available along " 10420 "with number of teams."); 10421 SmallVector<llvm::Value *> OffloadingArgs = { 10422 RTLoc, 10423 DeviceID, 10424 OutlinedFnID, 10425 PointerNum, 10426 InputInfo.BasePointersArray.getPointer(), 10427 InputInfo.PointersArray.getPointer(), 10428 InputInfo.SizesArray.getPointer(), 10429 MapTypesArray, 10430 MapNamesArray, 10431 InputInfo.MappersArray.getPointer(), 10432 NumTeams, 10433 NumThreads}; 10434 if (HasNowait) { 10435 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10436 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10437 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10438 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10439 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10440 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10441 } 10442 Return = CGF.EmitRuntimeCall( 10443 OMPBuilder.getOrCreateRuntimeFunction( 10444 CGM.getModule(), HasNowait 10445 ? OMPRTL___tgt_target_teams_nowait_mapper 10446 : OMPRTL___tgt_target_teams_mapper), 10447 OffloadingArgs); 10448 } else { 10449 SmallVector<llvm::Value *> OffloadingArgs = { 10450 RTLoc, 10451 DeviceID, 10452 OutlinedFnID, 10453 PointerNum, 10454 InputInfo.BasePointersArray.getPointer(), 10455 InputInfo.PointersArray.getPointer(), 10456 InputInfo.SizesArray.getPointer(), 10457 MapTypesArray, 10458 MapNamesArray, 10459 InputInfo.MappersArray.getPointer()}; 10460 if (HasNowait) { 10461 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10462 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10463 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10464 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10465 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10466 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10467 } 10468 Return = CGF.EmitRuntimeCall( 10469 OMPBuilder.getOrCreateRuntimeFunction( 10470 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10471 : OMPRTL___tgt_target_mapper), 10472 OffloadingArgs); 10473 } 10474 10475 // Check the error code and execute the host version if required. 10476 llvm::BasicBlock *OffloadFailedBlock = 10477 CGF.createBasicBlock("omp_offload.failed"); 10478 llvm::BasicBlock *OffloadContBlock = 10479 CGF.createBasicBlock("omp_offload.cont"); 10480 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10481 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10482 10483 CGF.EmitBlock(OffloadFailedBlock); 10484 if (RequiresOuterTask) { 10485 CapturedVars.clear(); 10486 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10487 } 10488 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10489 CGF.EmitBranch(OffloadContBlock); 10490 10491 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10492 }; 10493 10494 // Notify that the host version must be executed. 10495 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10496 RequiresOuterTask](CodeGenFunction &CGF, 10497 PrePostActionTy &) { 10498 if (RequiresOuterTask) { 10499 CapturedVars.clear(); 10500 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10501 } 10502 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10503 }; 10504 10505 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10506 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10507 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10508 // Fill up the arrays with all the captured variables. 10509 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10510 10511 // Get mappable expression information. 10512 MappableExprsHandler MEHandler(D, CGF); 10513 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10514 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10515 10516 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10517 auto *CV = CapturedVars.begin(); 10518 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10519 CE = CS.capture_end(); 10520 CI != CE; ++CI, ++RI, ++CV) { 10521 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10522 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10523 10524 // VLA sizes are passed to the outlined region by copy and do not have map 10525 // information associated. 10526 if (CI->capturesVariableArrayType()) { 10527 CurInfo.Exprs.push_back(nullptr); 10528 CurInfo.BasePointers.push_back(*CV); 10529 CurInfo.Pointers.push_back(*CV); 10530 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10531 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10532 // Copy to the device as an argument. No need to retrieve it. 10533 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10534 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10535 MappableExprsHandler::OMP_MAP_IMPLICIT); 10536 CurInfo.Mappers.push_back(nullptr); 10537 } else { 10538 // If we have any information in the map clause, we use it, otherwise we 10539 // just do a default mapping. 10540 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10541 if (!CI->capturesThis()) 10542 MappedVarSet.insert(CI->getCapturedVar()); 10543 else 10544 MappedVarSet.insert(nullptr); 10545 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10546 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10547 // Generate correct mapping for variables captured by reference in 10548 // lambdas. 10549 if (CI->capturesVariable()) 10550 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10551 CurInfo, LambdaPointers); 10552 } 10553 // We expect to have at least an element of information for this capture. 10554 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10555 "Non-existing map pointer for capture!"); 10556 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10557 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10558 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10559 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10560 "Inconsistent map information sizes!"); 10561 10562 // If there is an entry in PartialStruct it means we have a struct with 10563 // individual members mapped. Emit an extra combined entry. 10564 if (PartialStruct.Base.isValid()) { 10565 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10566 MEHandler.emitCombinedEntry( 10567 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10568 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10569 } 10570 10571 // We need to append the results of this capture to what we already have. 10572 CombinedInfo.append(CurInfo); 10573 } 10574 // Adjust MEMBER_OF flags for the lambdas captures. 10575 MEHandler.adjustMemberOfForLambdaCaptures( 10576 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10577 CombinedInfo.Types); 10578 // Map any list items in a map clause that were not captures because they 10579 // weren't referenced within the construct. 10580 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10581 10582 TargetDataInfo Info; 10583 // Fill up the arrays and create the arguments. 10584 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10585 emitOffloadingArraysArgument( 10586 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10587 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10588 {/*ForEndTask=*/false}); 10589 10590 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10591 InputInfo.BasePointersArray = 10592 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10593 InputInfo.PointersArray = 10594 Address(Info.PointersArray, CGM.getPointerAlign()); 10595 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10596 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10597 MapTypesArray = Info.MapTypesArray; 10598 MapNamesArray = Info.MapNamesArray; 10599 if (RequiresOuterTask) 10600 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10601 else 10602 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10603 }; 10604 10605 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10606 CodeGenFunction &CGF, PrePostActionTy &) { 10607 if (RequiresOuterTask) { 10608 CodeGenFunction::OMPTargetDataInfo InputInfo; 10609 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10610 } else { 10611 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10612 } 10613 }; 10614 10615 // If we have a target function ID it means that we need to support 10616 // offloading, otherwise, just execute on the host. We need to execute on host 10617 // regardless of the conditional in the if clause if, e.g., the user do not 10618 // specify target triples. 10619 if (OutlinedFnID) { 10620 if (IfCond) { 10621 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10622 } else { 10623 RegionCodeGenTy ThenRCG(TargetThenGen); 10624 ThenRCG(CGF); 10625 } 10626 } else { 10627 RegionCodeGenTy ElseRCG(TargetElseGen); 10628 ElseRCG(CGF); 10629 } 10630 } 10631 10632 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10633 StringRef ParentName) { 10634 if (!S) 10635 return; 10636 10637 // Codegen OMP target directives that offload compute to the device. 10638 bool RequiresDeviceCodegen = 10639 isa<OMPExecutableDirective>(S) && 10640 isOpenMPTargetExecutionDirective( 10641 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10642 10643 if (RequiresDeviceCodegen) { 10644 const auto &E = *cast<OMPExecutableDirective>(S); 10645 unsigned DeviceID; 10646 unsigned FileID; 10647 unsigned Line; 10648 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10649 FileID, Line); 10650 10651 // Is this a target region that should not be emitted as an entry point? If 10652 // so just signal we are done with this target region. 10653 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10654 ParentName, Line)) 10655 return; 10656 10657 switch (E.getDirectiveKind()) { 10658 case OMPD_target: 10659 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10660 cast<OMPTargetDirective>(E)); 10661 break; 10662 case OMPD_target_parallel: 10663 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10664 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10665 break; 10666 case OMPD_target_teams: 10667 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10668 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10669 break; 10670 case OMPD_target_teams_distribute: 10671 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10672 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10673 break; 10674 case OMPD_target_teams_distribute_simd: 10675 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10676 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10677 break; 10678 case OMPD_target_parallel_for: 10679 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10680 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10681 break; 10682 case OMPD_target_parallel_for_simd: 10683 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10684 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10685 break; 10686 case OMPD_target_simd: 10687 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10688 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10689 break; 10690 case OMPD_target_teams_distribute_parallel_for: 10691 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10692 CGM, ParentName, 10693 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10694 break; 10695 case OMPD_target_teams_distribute_parallel_for_simd: 10696 CodeGenFunction:: 10697 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10698 CGM, ParentName, 10699 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10700 break; 10701 case OMPD_parallel: 10702 case OMPD_for: 10703 case OMPD_parallel_for: 10704 case OMPD_parallel_master: 10705 case OMPD_parallel_sections: 10706 case OMPD_for_simd: 10707 case OMPD_parallel_for_simd: 10708 case OMPD_cancel: 10709 case OMPD_cancellation_point: 10710 case OMPD_ordered: 10711 case OMPD_threadprivate: 10712 case OMPD_allocate: 10713 case OMPD_task: 10714 case OMPD_simd: 10715 case OMPD_tile: 10716 case OMPD_unroll: 10717 case OMPD_sections: 10718 case OMPD_section: 10719 case OMPD_single: 10720 case OMPD_master: 10721 case OMPD_critical: 10722 case OMPD_taskyield: 10723 case OMPD_barrier: 10724 case OMPD_taskwait: 10725 case OMPD_taskgroup: 10726 case OMPD_atomic: 10727 case OMPD_flush: 10728 case OMPD_depobj: 10729 case OMPD_scan: 10730 case OMPD_teams: 10731 case OMPD_target_data: 10732 case OMPD_target_exit_data: 10733 case OMPD_target_enter_data: 10734 case OMPD_distribute: 10735 case OMPD_distribute_simd: 10736 case OMPD_distribute_parallel_for: 10737 case OMPD_distribute_parallel_for_simd: 10738 case OMPD_teams_distribute: 10739 case OMPD_teams_distribute_simd: 10740 case OMPD_teams_distribute_parallel_for: 10741 case OMPD_teams_distribute_parallel_for_simd: 10742 case OMPD_target_update: 10743 case OMPD_declare_simd: 10744 case OMPD_declare_variant: 10745 case OMPD_begin_declare_variant: 10746 case OMPD_end_declare_variant: 10747 case OMPD_declare_target: 10748 case OMPD_end_declare_target: 10749 case OMPD_declare_reduction: 10750 case OMPD_declare_mapper: 10751 case OMPD_taskloop: 10752 case OMPD_taskloop_simd: 10753 case OMPD_master_taskloop: 10754 case OMPD_master_taskloop_simd: 10755 case OMPD_parallel_master_taskloop: 10756 case OMPD_parallel_master_taskloop_simd: 10757 case OMPD_requires: 10758 case OMPD_metadirective: 10759 case OMPD_unknown: 10760 default: 10761 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10762 } 10763 return; 10764 } 10765 10766 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10767 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10768 return; 10769 10770 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10771 return; 10772 } 10773 10774 // If this is a lambda function, look into its body. 10775 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10776 S = L->getBody(); 10777 10778 // Keep looking for target regions recursively. 10779 for (const Stmt *II : S->children()) 10780 scanForTargetRegionsFunctions(II, ParentName); 10781 } 10782 10783 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10784 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10785 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10786 if (!DevTy) 10787 return false; 10788 // Do not emit device_type(nohost) functions for the host. 10789 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10790 return true; 10791 // Do not emit device_type(host) functions for the device. 10792 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10793 return true; 10794 return false; 10795 } 10796 10797 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10798 // If emitting code for the host, we do not process FD here. Instead we do 10799 // the normal code generation. 10800 if (!CGM.getLangOpts().OpenMPIsDevice) { 10801 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10802 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10803 CGM.getLangOpts().OpenMPIsDevice)) 10804 return true; 10805 return false; 10806 } 10807 10808 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10809 // Try to detect target regions in the function. 10810 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10811 StringRef Name = CGM.getMangledName(GD); 10812 scanForTargetRegionsFunctions(FD->getBody(), Name); 10813 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10814 CGM.getLangOpts().OpenMPIsDevice)) 10815 return true; 10816 } 10817 10818 // Do not to emit function if it is not marked as declare target. 10819 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10820 AlreadyEmittedTargetDecls.count(VD) == 0; 10821 } 10822 10823 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10824 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10825 CGM.getLangOpts().OpenMPIsDevice)) 10826 return true; 10827 10828 if (!CGM.getLangOpts().OpenMPIsDevice) 10829 return false; 10830 10831 // Check if there are Ctors/Dtors in this declaration and look for target 10832 // regions in it. We use the complete variant to produce the kernel name 10833 // mangling. 10834 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10835 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10836 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10837 StringRef ParentName = 10838 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10839 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10840 } 10841 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10842 StringRef ParentName = 10843 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10844 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10845 } 10846 } 10847 10848 // Do not to emit variable if it is not marked as declare target. 10849 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10850 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10851 cast<VarDecl>(GD.getDecl())); 10852 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10853 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10854 HasRequiresUnifiedSharedMemory)) { 10855 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10856 return true; 10857 } 10858 return false; 10859 } 10860 10861 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10862 llvm::Constant *Addr) { 10863 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10864 !CGM.getLangOpts().OpenMPIsDevice) 10865 return; 10866 10867 // If we have host/nohost variables, they do not need to be registered. 10868 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10869 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10870 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10871 return; 10872 10873 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10874 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10875 if (!Res) { 10876 if (CGM.getLangOpts().OpenMPIsDevice) { 10877 // Register non-target variables being emitted in device code (debug info 10878 // may cause this). 10879 StringRef VarName = CGM.getMangledName(VD); 10880 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10881 } 10882 return; 10883 } 10884 // Register declare target variables. 10885 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10886 StringRef VarName; 10887 CharUnits VarSize; 10888 llvm::GlobalValue::LinkageTypes Linkage; 10889 10890 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10891 !HasRequiresUnifiedSharedMemory) { 10892 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10893 VarName = CGM.getMangledName(VD); 10894 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10895 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10896 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10897 } else { 10898 VarSize = CharUnits::Zero(); 10899 } 10900 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10901 // Temp solution to prevent optimizations of the internal variables. 10902 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10903 // Do not create a "ref-variable" if the original is not also available 10904 // on the host. 10905 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10906 return; 10907 std::string RefName = getName({VarName, "ref"}); 10908 if (!CGM.GetGlobalValue(RefName)) { 10909 llvm::Constant *AddrRef = 10910 getOrCreateInternalVariable(Addr->getType(), RefName); 10911 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10912 GVAddrRef->setConstant(/*Val=*/true); 10913 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10914 GVAddrRef->setInitializer(Addr); 10915 CGM.addCompilerUsedGlobal(GVAddrRef); 10916 } 10917 } 10918 } else { 10919 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10920 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10921 HasRequiresUnifiedSharedMemory)) && 10922 "Declare target attribute must link or to with unified memory."); 10923 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10924 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10925 else 10926 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10927 10928 if (CGM.getLangOpts().OpenMPIsDevice) { 10929 VarName = Addr->getName(); 10930 Addr = nullptr; 10931 } else { 10932 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10933 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10934 } 10935 VarSize = CGM.getPointerSize(); 10936 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10937 } 10938 10939 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10940 VarName, Addr, VarSize, Flags, Linkage); 10941 } 10942 10943 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10944 if (isa<FunctionDecl>(GD.getDecl()) || 10945 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10946 return emitTargetFunctions(GD); 10947 10948 return emitTargetGlobalVariable(GD); 10949 } 10950 10951 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10952 for (const VarDecl *VD : DeferredGlobalVariables) { 10953 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10954 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10955 if (!Res) 10956 continue; 10957 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10958 !HasRequiresUnifiedSharedMemory) { 10959 CGM.EmitGlobal(VD); 10960 } else { 10961 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10962 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10963 HasRequiresUnifiedSharedMemory)) && 10964 "Expected link clause or to clause with unified memory."); 10965 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10966 } 10967 } 10968 } 10969 10970 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10971 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10972 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10973 " Expected target-based directive."); 10974 } 10975 10976 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10977 for (const OMPClause *Clause : D->clauselists()) { 10978 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10979 HasRequiresUnifiedSharedMemory = true; 10980 } else if (const auto *AC = 10981 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10982 switch (AC->getAtomicDefaultMemOrderKind()) { 10983 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10984 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10985 break; 10986 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10987 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10988 break; 10989 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10990 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10991 break; 10992 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10993 break; 10994 } 10995 } 10996 } 10997 } 10998 10999 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11000 return RequiresAtomicOrdering; 11001 } 11002 11003 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11004 LangAS &AS) { 11005 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11006 return false; 11007 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11008 switch(A->getAllocatorType()) { 11009 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11010 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11011 // Not supported, fallback to the default mem space. 11012 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11013 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11014 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11015 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11016 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11017 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11018 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11019 AS = LangAS::Default; 11020 return true; 11021 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11022 llvm_unreachable("Expected predefined allocator for the variables with the " 11023 "static storage."); 11024 } 11025 return false; 11026 } 11027 11028 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11029 return HasRequiresUnifiedSharedMemory; 11030 } 11031 11032 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11033 CodeGenModule &CGM) 11034 : CGM(CGM) { 11035 if (CGM.getLangOpts().OpenMPIsDevice) { 11036 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11037 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11038 } 11039 } 11040 11041 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11042 if (CGM.getLangOpts().OpenMPIsDevice) 11043 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11044 } 11045 11046 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11047 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11048 return true; 11049 11050 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11051 // Do not to emit function if it is marked as declare target as it was already 11052 // emitted. 11053 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11054 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11055 if (auto *F = dyn_cast_or_null<llvm::Function>( 11056 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11057 return !F->isDeclaration(); 11058 return false; 11059 } 11060 return true; 11061 } 11062 11063 return !AlreadyEmittedTargetDecls.insert(D).second; 11064 } 11065 11066 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11067 // If we don't have entries or if we are emitting code for the device, we 11068 // don't need to do anything. 11069 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11070 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11071 (OffloadEntriesInfoManager.empty() && 11072 !HasEmittedDeclareTargetRegion && 11073 !HasEmittedTargetRegion)) 11074 return nullptr; 11075 11076 // Create and register the function that handles the requires directives. 11077 ASTContext &C = CGM.getContext(); 11078 11079 llvm::Function *RequiresRegFn; 11080 { 11081 CodeGenFunction CGF(CGM); 11082 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11083 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11084 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11085 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11086 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11087 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11088 // TODO: check for other requires clauses. 11089 // The requires directive takes effect only when a target region is 11090 // present in the compilation unit. Otherwise it is ignored and not 11091 // passed to the runtime. This avoids the runtime from throwing an error 11092 // for mismatching requires clauses across compilation units that don't 11093 // contain at least 1 target region. 11094 assert((HasEmittedTargetRegion || 11095 HasEmittedDeclareTargetRegion || 11096 !OffloadEntriesInfoManager.empty()) && 11097 "Target or declare target region expected."); 11098 if (HasRequiresUnifiedSharedMemory) 11099 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11100 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11101 CGM.getModule(), OMPRTL___tgt_register_requires), 11102 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11103 CGF.FinishFunction(); 11104 } 11105 return RequiresRegFn; 11106 } 11107 11108 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11109 const OMPExecutableDirective &D, 11110 SourceLocation Loc, 11111 llvm::Function *OutlinedFn, 11112 ArrayRef<llvm::Value *> CapturedVars) { 11113 if (!CGF.HaveInsertPoint()) 11114 return; 11115 11116 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11117 CodeGenFunction::RunCleanupsScope Scope(CGF); 11118 11119 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11120 llvm::Value *Args[] = { 11121 RTLoc, 11122 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11123 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11124 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11125 RealArgs.append(std::begin(Args), std::end(Args)); 11126 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11127 11128 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11129 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11130 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11131 } 11132 11133 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11134 const Expr *NumTeams, 11135 const Expr *ThreadLimit, 11136 SourceLocation Loc) { 11137 if (!CGF.HaveInsertPoint()) 11138 return; 11139 11140 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11141 11142 llvm::Value *NumTeamsVal = 11143 NumTeams 11144 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11145 CGF.CGM.Int32Ty, /* isSigned = */ true) 11146 : CGF.Builder.getInt32(0); 11147 11148 llvm::Value *ThreadLimitVal = 11149 ThreadLimit 11150 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11151 CGF.CGM.Int32Ty, /* isSigned = */ true) 11152 : CGF.Builder.getInt32(0); 11153 11154 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11155 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11156 ThreadLimitVal}; 11157 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11158 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11159 PushNumTeamsArgs); 11160 } 11161 11162 void CGOpenMPRuntime::emitTargetDataCalls( 11163 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11164 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11165 if (!CGF.HaveInsertPoint()) 11166 return; 11167 11168 // Action used to replace the default codegen action and turn privatization 11169 // off. 11170 PrePostActionTy NoPrivAction; 11171 11172 // Generate the code for the opening of the data environment. Capture all the 11173 // arguments of the runtime call by reference because they are used in the 11174 // closing of the region. 11175 auto &&BeginThenGen = [this, &D, Device, &Info, 11176 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11177 // Fill up the arrays with all the mapped variables. 11178 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11179 11180 // Get map clause information. 11181 MappableExprsHandler MEHandler(D, CGF); 11182 MEHandler.generateAllInfo(CombinedInfo); 11183 11184 // Fill up the arrays and create the arguments. 11185 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11186 /*IsNonContiguous=*/true); 11187 11188 llvm::Value *BasePointersArrayArg = nullptr; 11189 llvm::Value *PointersArrayArg = nullptr; 11190 llvm::Value *SizesArrayArg = nullptr; 11191 llvm::Value *MapTypesArrayArg = nullptr; 11192 llvm::Value *MapNamesArrayArg = nullptr; 11193 llvm::Value *MappersArrayArg = nullptr; 11194 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11195 SizesArrayArg, MapTypesArrayArg, 11196 MapNamesArrayArg, MappersArrayArg, Info); 11197 11198 // Emit device ID if any. 11199 llvm::Value *DeviceID = nullptr; 11200 if (Device) { 11201 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11202 CGF.Int64Ty, /*isSigned=*/true); 11203 } else { 11204 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11205 } 11206 11207 // Emit the number of elements in the offloading arrays. 11208 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11209 // 11210 // Source location for the ident struct 11211 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11212 11213 llvm::Value *OffloadingArgs[] = {RTLoc, 11214 DeviceID, 11215 PointerNum, 11216 BasePointersArrayArg, 11217 PointersArrayArg, 11218 SizesArrayArg, 11219 MapTypesArrayArg, 11220 MapNamesArrayArg, 11221 MappersArrayArg}; 11222 CGF.EmitRuntimeCall( 11223 OMPBuilder.getOrCreateRuntimeFunction( 11224 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11225 OffloadingArgs); 11226 11227 // If device pointer privatization is required, emit the body of the region 11228 // here. It will have to be duplicated: with and without privatization. 11229 if (!Info.CaptureDeviceAddrMap.empty()) 11230 CodeGen(CGF); 11231 }; 11232 11233 // Generate code for the closing of the data region. 11234 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11235 PrePostActionTy &) { 11236 assert(Info.isValid() && "Invalid data environment closing arguments."); 11237 11238 llvm::Value *BasePointersArrayArg = nullptr; 11239 llvm::Value *PointersArrayArg = nullptr; 11240 llvm::Value *SizesArrayArg = nullptr; 11241 llvm::Value *MapTypesArrayArg = nullptr; 11242 llvm::Value *MapNamesArrayArg = nullptr; 11243 llvm::Value *MappersArrayArg = nullptr; 11244 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11245 SizesArrayArg, MapTypesArrayArg, 11246 MapNamesArrayArg, MappersArrayArg, Info, 11247 {/*ForEndCall=*/true}); 11248 11249 // Emit device ID if any. 11250 llvm::Value *DeviceID = nullptr; 11251 if (Device) { 11252 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11253 CGF.Int64Ty, /*isSigned=*/true); 11254 } else { 11255 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11256 } 11257 11258 // Emit the number of elements in the offloading arrays. 11259 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11260 11261 // Source location for the ident struct 11262 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11263 11264 llvm::Value *OffloadingArgs[] = {RTLoc, 11265 DeviceID, 11266 PointerNum, 11267 BasePointersArrayArg, 11268 PointersArrayArg, 11269 SizesArrayArg, 11270 MapTypesArrayArg, 11271 MapNamesArrayArg, 11272 MappersArrayArg}; 11273 CGF.EmitRuntimeCall( 11274 OMPBuilder.getOrCreateRuntimeFunction( 11275 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11276 OffloadingArgs); 11277 }; 11278 11279 // If we need device pointer privatization, we need to emit the body of the 11280 // region with no privatization in the 'else' branch of the conditional. 11281 // Otherwise, we don't have to do anything. 11282 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11283 PrePostActionTy &) { 11284 if (!Info.CaptureDeviceAddrMap.empty()) { 11285 CodeGen.setAction(NoPrivAction); 11286 CodeGen(CGF); 11287 } 11288 }; 11289 11290 // We don't have to do anything to close the region if the if clause evaluates 11291 // to false. 11292 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11293 11294 if (IfCond) { 11295 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11296 } else { 11297 RegionCodeGenTy RCG(BeginThenGen); 11298 RCG(CGF); 11299 } 11300 11301 // If we don't require privatization of device pointers, we emit the body in 11302 // between the runtime calls. This avoids duplicating the body code. 11303 if (Info.CaptureDeviceAddrMap.empty()) { 11304 CodeGen.setAction(NoPrivAction); 11305 CodeGen(CGF); 11306 } 11307 11308 if (IfCond) { 11309 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11310 } else { 11311 RegionCodeGenTy RCG(EndThenGen); 11312 RCG(CGF); 11313 } 11314 } 11315 11316 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11317 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11318 const Expr *Device) { 11319 if (!CGF.HaveInsertPoint()) 11320 return; 11321 11322 assert((isa<OMPTargetEnterDataDirective>(D) || 11323 isa<OMPTargetExitDataDirective>(D) || 11324 isa<OMPTargetUpdateDirective>(D)) && 11325 "Expecting either target enter, exit data, or update directives."); 11326 11327 CodeGenFunction::OMPTargetDataInfo InputInfo; 11328 llvm::Value *MapTypesArray = nullptr; 11329 llvm::Value *MapNamesArray = nullptr; 11330 // Generate the code for the opening of the data environment. 11331 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11332 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11333 // Emit device ID if any. 11334 llvm::Value *DeviceID = nullptr; 11335 if (Device) { 11336 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11337 CGF.Int64Ty, /*isSigned=*/true); 11338 } else { 11339 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11340 } 11341 11342 // Emit the number of elements in the offloading arrays. 11343 llvm::Constant *PointerNum = 11344 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11345 11346 // Source location for the ident struct 11347 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11348 11349 llvm::Value *OffloadingArgs[] = {RTLoc, 11350 DeviceID, 11351 PointerNum, 11352 InputInfo.BasePointersArray.getPointer(), 11353 InputInfo.PointersArray.getPointer(), 11354 InputInfo.SizesArray.getPointer(), 11355 MapTypesArray, 11356 MapNamesArray, 11357 InputInfo.MappersArray.getPointer()}; 11358 11359 // Select the right runtime function call for each standalone 11360 // directive. 11361 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11362 RuntimeFunction RTLFn; 11363 switch (D.getDirectiveKind()) { 11364 case OMPD_target_enter_data: 11365 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11366 : OMPRTL___tgt_target_data_begin_mapper; 11367 break; 11368 case OMPD_target_exit_data: 11369 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11370 : OMPRTL___tgt_target_data_end_mapper; 11371 break; 11372 case OMPD_target_update: 11373 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11374 : OMPRTL___tgt_target_data_update_mapper; 11375 break; 11376 case OMPD_parallel: 11377 case OMPD_for: 11378 case OMPD_parallel_for: 11379 case OMPD_parallel_master: 11380 case OMPD_parallel_sections: 11381 case OMPD_for_simd: 11382 case OMPD_parallel_for_simd: 11383 case OMPD_cancel: 11384 case OMPD_cancellation_point: 11385 case OMPD_ordered: 11386 case OMPD_threadprivate: 11387 case OMPD_allocate: 11388 case OMPD_task: 11389 case OMPD_simd: 11390 case OMPD_tile: 11391 case OMPD_unroll: 11392 case OMPD_sections: 11393 case OMPD_section: 11394 case OMPD_single: 11395 case OMPD_master: 11396 case OMPD_critical: 11397 case OMPD_taskyield: 11398 case OMPD_barrier: 11399 case OMPD_taskwait: 11400 case OMPD_taskgroup: 11401 case OMPD_atomic: 11402 case OMPD_flush: 11403 case OMPD_depobj: 11404 case OMPD_scan: 11405 case OMPD_teams: 11406 case OMPD_target_data: 11407 case OMPD_distribute: 11408 case OMPD_distribute_simd: 11409 case OMPD_distribute_parallel_for: 11410 case OMPD_distribute_parallel_for_simd: 11411 case OMPD_teams_distribute: 11412 case OMPD_teams_distribute_simd: 11413 case OMPD_teams_distribute_parallel_for: 11414 case OMPD_teams_distribute_parallel_for_simd: 11415 case OMPD_declare_simd: 11416 case OMPD_declare_variant: 11417 case OMPD_begin_declare_variant: 11418 case OMPD_end_declare_variant: 11419 case OMPD_declare_target: 11420 case OMPD_end_declare_target: 11421 case OMPD_declare_reduction: 11422 case OMPD_declare_mapper: 11423 case OMPD_taskloop: 11424 case OMPD_taskloop_simd: 11425 case OMPD_master_taskloop: 11426 case OMPD_master_taskloop_simd: 11427 case OMPD_parallel_master_taskloop: 11428 case OMPD_parallel_master_taskloop_simd: 11429 case OMPD_target: 11430 case OMPD_target_simd: 11431 case OMPD_target_teams_distribute: 11432 case OMPD_target_teams_distribute_simd: 11433 case OMPD_target_teams_distribute_parallel_for: 11434 case OMPD_target_teams_distribute_parallel_for_simd: 11435 case OMPD_target_teams: 11436 case OMPD_target_parallel: 11437 case OMPD_target_parallel_for: 11438 case OMPD_target_parallel_for_simd: 11439 case OMPD_requires: 11440 case OMPD_metadirective: 11441 case OMPD_unknown: 11442 default: 11443 llvm_unreachable("Unexpected standalone target data directive."); 11444 break; 11445 } 11446 CGF.EmitRuntimeCall( 11447 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11448 OffloadingArgs); 11449 }; 11450 11451 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11452 &MapNamesArray](CodeGenFunction &CGF, 11453 PrePostActionTy &) { 11454 // Fill up the arrays with all the mapped variables. 11455 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11456 11457 // Get map clause information. 11458 MappableExprsHandler MEHandler(D, CGF); 11459 MEHandler.generateAllInfo(CombinedInfo); 11460 11461 TargetDataInfo Info; 11462 // Fill up the arrays and create the arguments. 11463 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11464 /*IsNonContiguous=*/true); 11465 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11466 D.hasClausesOfKind<OMPNowaitClause>(); 11467 emitOffloadingArraysArgument( 11468 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11469 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11470 {/*ForEndTask=*/false}); 11471 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11472 InputInfo.BasePointersArray = 11473 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11474 InputInfo.PointersArray = 11475 Address(Info.PointersArray, CGM.getPointerAlign()); 11476 InputInfo.SizesArray = 11477 Address(Info.SizesArray, CGM.getPointerAlign()); 11478 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11479 MapTypesArray = Info.MapTypesArray; 11480 MapNamesArray = Info.MapNamesArray; 11481 if (RequiresOuterTask) 11482 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11483 else 11484 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11485 }; 11486 11487 if (IfCond) { 11488 emitIfClause(CGF, IfCond, TargetThenGen, 11489 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11490 } else { 11491 RegionCodeGenTy ThenRCG(TargetThenGen); 11492 ThenRCG(CGF); 11493 } 11494 } 11495 11496 namespace { 11497 /// Kind of parameter in a function with 'declare simd' directive. 11498 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11499 /// Attribute set of the parameter. 11500 struct ParamAttrTy { 11501 ParamKindTy Kind = Vector; 11502 llvm::APSInt StrideOrArg; 11503 llvm::APSInt Alignment; 11504 }; 11505 } // namespace 11506 11507 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11508 ArrayRef<ParamAttrTy> ParamAttrs) { 11509 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11510 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11511 // of that clause. The VLEN value must be power of 2. 11512 // In other case the notion of the function`s "characteristic data type" (CDT) 11513 // is used to compute the vector length. 11514 // CDT is defined in the following order: 11515 // a) For non-void function, the CDT is the return type. 11516 // b) If the function has any non-uniform, non-linear parameters, then the 11517 // CDT is the type of the first such parameter. 11518 // c) If the CDT determined by a) or b) above is struct, union, or class 11519 // type which is pass-by-value (except for the type that maps to the 11520 // built-in complex data type), the characteristic data type is int. 11521 // d) If none of the above three cases is applicable, the CDT is int. 11522 // The VLEN is then determined based on the CDT and the size of vector 11523 // register of that ISA for which current vector version is generated. The 11524 // VLEN is computed using the formula below: 11525 // VLEN = sizeof(vector_register) / sizeof(CDT), 11526 // where vector register size specified in section 3.2.1 Registers and the 11527 // Stack Frame of original AMD64 ABI document. 11528 QualType RetType = FD->getReturnType(); 11529 if (RetType.isNull()) 11530 return 0; 11531 ASTContext &C = FD->getASTContext(); 11532 QualType CDT; 11533 if (!RetType.isNull() && !RetType->isVoidType()) { 11534 CDT = RetType; 11535 } else { 11536 unsigned Offset = 0; 11537 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11538 if (ParamAttrs[Offset].Kind == Vector) 11539 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11540 ++Offset; 11541 } 11542 if (CDT.isNull()) { 11543 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11544 if (ParamAttrs[I + Offset].Kind == Vector) { 11545 CDT = FD->getParamDecl(I)->getType(); 11546 break; 11547 } 11548 } 11549 } 11550 } 11551 if (CDT.isNull()) 11552 CDT = C.IntTy; 11553 CDT = CDT->getCanonicalTypeUnqualified(); 11554 if (CDT->isRecordType() || CDT->isUnionType()) 11555 CDT = C.IntTy; 11556 return C.getTypeSize(CDT); 11557 } 11558 11559 static void 11560 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11561 const llvm::APSInt &VLENVal, 11562 ArrayRef<ParamAttrTy> ParamAttrs, 11563 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11564 struct ISADataTy { 11565 char ISA; 11566 unsigned VecRegSize; 11567 }; 11568 ISADataTy ISAData[] = { 11569 { 11570 'b', 128 11571 }, // SSE 11572 { 11573 'c', 256 11574 }, // AVX 11575 { 11576 'd', 256 11577 }, // AVX2 11578 { 11579 'e', 512 11580 }, // AVX512 11581 }; 11582 llvm::SmallVector<char, 2> Masked; 11583 switch (State) { 11584 case OMPDeclareSimdDeclAttr::BS_Undefined: 11585 Masked.push_back('N'); 11586 Masked.push_back('M'); 11587 break; 11588 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11589 Masked.push_back('N'); 11590 break; 11591 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11592 Masked.push_back('M'); 11593 break; 11594 } 11595 for (char Mask : Masked) { 11596 for (const ISADataTy &Data : ISAData) { 11597 SmallString<256> Buffer; 11598 llvm::raw_svector_ostream Out(Buffer); 11599 Out << "_ZGV" << Data.ISA << Mask; 11600 if (!VLENVal) { 11601 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11602 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11603 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11604 } else { 11605 Out << VLENVal; 11606 } 11607 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11608 switch (ParamAttr.Kind){ 11609 case LinearWithVarStride: 11610 Out << 's' << ParamAttr.StrideOrArg; 11611 break; 11612 case Linear: 11613 Out << 'l'; 11614 if (ParamAttr.StrideOrArg != 1) 11615 Out << ParamAttr.StrideOrArg; 11616 break; 11617 case Uniform: 11618 Out << 'u'; 11619 break; 11620 case Vector: 11621 Out << 'v'; 11622 break; 11623 } 11624 if (!!ParamAttr.Alignment) 11625 Out << 'a' << ParamAttr.Alignment; 11626 } 11627 Out << '_' << Fn->getName(); 11628 Fn->addFnAttr(Out.str()); 11629 } 11630 } 11631 } 11632 11633 // This are the Functions that are needed to mangle the name of the 11634 // vector functions generated by the compiler, according to the rules 11635 // defined in the "Vector Function ABI specifications for AArch64", 11636 // available at 11637 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11638 11639 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11640 /// 11641 /// TODO: Need to implement the behavior for reference marked with a 11642 /// var or no linear modifiers (1.b in the section). For this, we 11643 /// need to extend ParamKindTy to support the linear modifiers. 11644 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11645 QT = QT.getCanonicalType(); 11646 11647 if (QT->isVoidType()) 11648 return false; 11649 11650 if (Kind == ParamKindTy::Uniform) 11651 return false; 11652 11653 if (Kind == ParamKindTy::Linear) 11654 return false; 11655 11656 // TODO: Handle linear references with modifiers 11657 11658 if (Kind == ParamKindTy::LinearWithVarStride) 11659 return false; 11660 11661 return true; 11662 } 11663 11664 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11665 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11666 QT = QT.getCanonicalType(); 11667 unsigned Size = C.getTypeSize(QT); 11668 11669 // Only scalars and complex within 16 bytes wide set PVB to true. 11670 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11671 return false; 11672 11673 if (QT->isFloatingType()) 11674 return true; 11675 11676 if (QT->isIntegerType()) 11677 return true; 11678 11679 if (QT->isPointerType()) 11680 return true; 11681 11682 // TODO: Add support for complex types (section 3.1.2, item 2). 11683 11684 return false; 11685 } 11686 11687 /// Computes the lane size (LS) of a return type or of an input parameter, 11688 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11689 /// TODO: Add support for references, section 3.2.1, item 1. 11690 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11691 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11692 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11693 if (getAArch64PBV(PTy, C)) 11694 return C.getTypeSize(PTy); 11695 } 11696 if (getAArch64PBV(QT, C)) 11697 return C.getTypeSize(QT); 11698 11699 return C.getTypeSize(C.getUIntPtrType()); 11700 } 11701 11702 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11703 // signature of the scalar function, as defined in 3.2.2 of the 11704 // AAVFABI. 11705 static std::tuple<unsigned, unsigned, bool> 11706 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11707 QualType RetType = FD->getReturnType().getCanonicalType(); 11708 11709 ASTContext &C = FD->getASTContext(); 11710 11711 bool OutputBecomesInput = false; 11712 11713 llvm::SmallVector<unsigned, 8> Sizes; 11714 if (!RetType->isVoidType()) { 11715 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11716 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11717 OutputBecomesInput = true; 11718 } 11719 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11720 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11721 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11722 } 11723 11724 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11725 // The LS of a function parameter / return value can only be a power 11726 // of 2, starting from 8 bits, up to 128. 11727 assert(llvm::all_of(Sizes, 11728 [](unsigned Size) { 11729 return Size == 8 || Size == 16 || Size == 32 || 11730 Size == 64 || Size == 128; 11731 }) && 11732 "Invalid size"); 11733 11734 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11735 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11736 OutputBecomesInput); 11737 } 11738 11739 /// Mangle the parameter part of the vector function name according to 11740 /// their OpenMP classification. The mangling function is defined in 11741 /// section 3.5 of the AAVFABI. 11742 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11743 SmallString<256> Buffer; 11744 llvm::raw_svector_ostream Out(Buffer); 11745 for (const auto &ParamAttr : ParamAttrs) { 11746 switch (ParamAttr.Kind) { 11747 case LinearWithVarStride: 11748 Out << "ls" << ParamAttr.StrideOrArg; 11749 break; 11750 case Linear: 11751 Out << 'l'; 11752 // Don't print the step value if it is not present or if it is 11753 // equal to 1. 11754 if (ParamAttr.StrideOrArg != 1) 11755 Out << ParamAttr.StrideOrArg; 11756 break; 11757 case Uniform: 11758 Out << 'u'; 11759 break; 11760 case Vector: 11761 Out << 'v'; 11762 break; 11763 } 11764 11765 if (!!ParamAttr.Alignment) 11766 Out << 'a' << ParamAttr.Alignment; 11767 } 11768 11769 return std::string(Out.str()); 11770 } 11771 11772 // Function used to add the attribute. The parameter `VLEN` is 11773 // templated to allow the use of "x" when targeting scalable functions 11774 // for SVE. 11775 template <typename T> 11776 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11777 char ISA, StringRef ParSeq, 11778 StringRef MangledName, bool OutputBecomesInput, 11779 llvm::Function *Fn) { 11780 SmallString<256> Buffer; 11781 llvm::raw_svector_ostream Out(Buffer); 11782 Out << Prefix << ISA << LMask << VLEN; 11783 if (OutputBecomesInput) 11784 Out << "v"; 11785 Out << ParSeq << "_" << MangledName; 11786 Fn->addFnAttr(Out.str()); 11787 } 11788 11789 // Helper function to generate the Advanced SIMD names depending on 11790 // the value of the NDS when simdlen is not present. 11791 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11792 StringRef Prefix, char ISA, 11793 StringRef ParSeq, StringRef MangledName, 11794 bool OutputBecomesInput, 11795 llvm::Function *Fn) { 11796 switch (NDS) { 11797 case 8: 11798 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11799 OutputBecomesInput, Fn); 11800 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11801 OutputBecomesInput, Fn); 11802 break; 11803 case 16: 11804 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11805 OutputBecomesInput, Fn); 11806 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 break; 11809 case 32: 11810 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11811 OutputBecomesInput, Fn); 11812 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11813 OutputBecomesInput, Fn); 11814 break; 11815 case 64: 11816 case 128: 11817 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11818 OutputBecomesInput, Fn); 11819 break; 11820 default: 11821 llvm_unreachable("Scalar type is too wide."); 11822 } 11823 } 11824 11825 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11826 static void emitAArch64DeclareSimdFunction( 11827 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11828 ArrayRef<ParamAttrTy> ParamAttrs, 11829 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11830 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11831 11832 // Get basic data for building the vector signature. 11833 const auto Data = getNDSWDS(FD, ParamAttrs); 11834 const unsigned NDS = std::get<0>(Data); 11835 const unsigned WDS = std::get<1>(Data); 11836 const bool OutputBecomesInput = std::get<2>(Data); 11837 11838 // Check the values provided via `simdlen` by the user. 11839 // 1. A `simdlen(1)` doesn't produce vector signatures, 11840 if (UserVLEN == 1) { 11841 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11842 DiagnosticsEngine::Warning, 11843 "The clause simdlen(1) has no effect when targeting aarch64."); 11844 CGM.getDiags().Report(SLoc, DiagID); 11845 return; 11846 } 11847 11848 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11849 // Advanced SIMD output. 11850 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11851 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11852 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11853 "power of 2 when targeting Advanced SIMD."); 11854 CGM.getDiags().Report(SLoc, DiagID); 11855 return; 11856 } 11857 11858 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11859 // limits. 11860 if (ISA == 's' && UserVLEN != 0) { 11861 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11862 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11863 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11864 "lanes in the architectural constraints " 11865 "for SVE (min is 128-bit, max is " 11866 "2048-bit, by steps of 128-bit)"); 11867 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11868 return; 11869 } 11870 } 11871 11872 // Sort out parameter sequence. 11873 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11874 StringRef Prefix = "_ZGV"; 11875 // Generate simdlen from user input (if any). 11876 if (UserVLEN) { 11877 if (ISA == 's') { 11878 // SVE generates only a masked function. 11879 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11880 OutputBecomesInput, Fn); 11881 } else { 11882 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11883 // Advanced SIMD generates one or two functions, depending on 11884 // the `[not]inbranch` clause. 11885 switch (State) { 11886 case OMPDeclareSimdDeclAttr::BS_Undefined: 11887 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11888 OutputBecomesInput, Fn); 11889 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11890 OutputBecomesInput, Fn); 11891 break; 11892 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11893 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11894 OutputBecomesInput, Fn); 11895 break; 11896 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11897 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11898 OutputBecomesInput, Fn); 11899 break; 11900 } 11901 } 11902 } else { 11903 // If no user simdlen is provided, follow the AAVFABI rules for 11904 // generating the vector length. 11905 if (ISA == 's') { 11906 // SVE, section 3.4.1, item 1. 11907 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11908 OutputBecomesInput, Fn); 11909 } else { 11910 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11911 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11912 // two vector names depending on the use of the clause 11913 // `[not]inbranch`. 11914 switch (State) { 11915 case OMPDeclareSimdDeclAttr::BS_Undefined: 11916 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11917 OutputBecomesInput, Fn); 11918 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11919 OutputBecomesInput, Fn); 11920 break; 11921 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11922 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11923 OutputBecomesInput, Fn); 11924 break; 11925 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11926 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11927 OutputBecomesInput, Fn); 11928 break; 11929 } 11930 } 11931 } 11932 } 11933 11934 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11935 llvm::Function *Fn) { 11936 ASTContext &C = CGM.getContext(); 11937 FD = FD->getMostRecentDecl(); 11938 // Map params to their positions in function decl. 11939 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11940 if (isa<CXXMethodDecl>(FD)) 11941 ParamPositions.try_emplace(FD, 0); 11942 unsigned ParamPos = ParamPositions.size(); 11943 for (const ParmVarDecl *P : FD->parameters()) { 11944 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11945 ++ParamPos; 11946 } 11947 while (FD) { 11948 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11949 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11950 // Mark uniform parameters. 11951 for (const Expr *E : Attr->uniforms()) { 11952 E = E->IgnoreParenImpCasts(); 11953 unsigned Pos; 11954 if (isa<CXXThisExpr>(E)) { 11955 Pos = ParamPositions[FD]; 11956 } else { 11957 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11958 ->getCanonicalDecl(); 11959 Pos = ParamPositions[PVD]; 11960 } 11961 ParamAttrs[Pos].Kind = Uniform; 11962 } 11963 // Get alignment info. 11964 auto NI = Attr->alignments_begin(); 11965 for (const Expr *E : Attr->aligneds()) { 11966 E = E->IgnoreParenImpCasts(); 11967 unsigned Pos; 11968 QualType ParmTy; 11969 if (isa<CXXThisExpr>(E)) { 11970 Pos = ParamPositions[FD]; 11971 ParmTy = E->getType(); 11972 } else { 11973 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11974 ->getCanonicalDecl(); 11975 Pos = ParamPositions[PVD]; 11976 ParmTy = PVD->getType(); 11977 } 11978 ParamAttrs[Pos].Alignment = 11979 (*NI) 11980 ? (*NI)->EvaluateKnownConstInt(C) 11981 : llvm::APSInt::getUnsigned( 11982 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11983 .getQuantity()); 11984 ++NI; 11985 } 11986 // Mark linear parameters. 11987 auto SI = Attr->steps_begin(); 11988 auto MI = Attr->modifiers_begin(); 11989 for (const Expr *E : Attr->linears()) { 11990 E = E->IgnoreParenImpCasts(); 11991 unsigned Pos; 11992 // Rescaling factor needed to compute the linear parameter 11993 // value in the mangled name. 11994 unsigned PtrRescalingFactor = 1; 11995 if (isa<CXXThisExpr>(E)) { 11996 Pos = ParamPositions[FD]; 11997 } else { 11998 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11999 ->getCanonicalDecl(); 12000 Pos = ParamPositions[PVD]; 12001 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12002 PtrRescalingFactor = CGM.getContext() 12003 .getTypeSizeInChars(P->getPointeeType()) 12004 .getQuantity(); 12005 } 12006 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12007 ParamAttr.Kind = Linear; 12008 // Assuming a stride of 1, for `linear` without modifiers. 12009 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12010 if (*SI) { 12011 Expr::EvalResult Result; 12012 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12013 if (const auto *DRE = 12014 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12015 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12016 ParamAttr.Kind = LinearWithVarStride; 12017 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12018 ParamPositions[StridePVD->getCanonicalDecl()]); 12019 } 12020 } 12021 } else { 12022 ParamAttr.StrideOrArg = Result.Val.getInt(); 12023 } 12024 } 12025 // If we are using a linear clause on a pointer, we need to 12026 // rescale the value of linear_step with the byte size of the 12027 // pointee type. 12028 if (Linear == ParamAttr.Kind) 12029 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12030 ++SI; 12031 ++MI; 12032 } 12033 llvm::APSInt VLENVal; 12034 SourceLocation ExprLoc; 12035 const Expr *VLENExpr = Attr->getSimdlen(); 12036 if (VLENExpr) { 12037 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12038 ExprLoc = VLENExpr->getExprLoc(); 12039 } 12040 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12041 if (CGM.getTriple().isX86()) { 12042 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12043 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12044 unsigned VLEN = VLENVal.getExtValue(); 12045 StringRef MangledName = Fn->getName(); 12046 if (CGM.getTarget().hasFeature("sve")) 12047 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12048 MangledName, 's', 128, Fn, ExprLoc); 12049 if (CGM.getTarget().hasFeature("neon")) 12050 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12051 MangledName, 'n', 128, Fn, ExprLoc); 12052 } 12053 } 12054 FD = FD->getPreviousDecl(); 12055 } 12056 } 12057 12058 namespace { 12059 /// Cleanup action for doacross support. 12060 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12061 public: 12062 static const int DoacrossFinArgs = 2; 12063 12064 private: 12065 llvm::FunctionCallee RTLFn; 12066 llvm::Value *Args[DoacrossFinArgs]; 12067 12068 public: 12069 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12070 ArrayRef<llvm::Value *> CallArgs) 12071 : RTLFn(RTLFn) { 12072 assert(CallArgs.size() == DoacrossFinArgs); 12073 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12074 } 12075 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12076 if (!CGF.HaveInsertPoint()) 12077 return; 12078 CGF.EmitRuntimeCall(RTLFn, Args); 12079 } 12080 }; 12081 } // namespace 12082 12083 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12084 const OMPLoopDirective &D, 12085 ArrayRef<Expr *> NumIterations) { 12086 if (!CGF.HaveInsertPoint()) 12087 return; 12088 12089 ASTContext &C = CGM.getContext(); 12090 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12091 RecordDecl *RD; 12092 if (KmpDimTy.isNull()) { 12093 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12094 // kmp_int64 lo; // lower 12095 // kmp_int64 up; // upper 12096 // kmp_int64 st; // stride 12097 // }; 12098 RD = C.buildImplicitRecord("kmp_dim"); 12099 RD->startDefinition(); 12100 addFieldToRecordDecl(C, RD, Int64Ty); 12101 addFieldToRecordDecl(C, RD, Int64Ty); 12102 addFieldToRecordDecl(C, RD, Int64Ty); 12103 RD->completeDefinition(); 12104 KmpDimTy = C.getRecordType(RD); 12105 } else { 12106 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12107 } 12108 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12109 QualType ArrayTy = 12110 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12111 12112 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12113 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12114 enum { LowerFD = 0, UpperFD, StrideFD }; 12115 // Fill dims with data. 12116 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12117 LValue DimsLVal = CGF.MakeAddrLValue( 12118 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12119 // dims.upper = num_iterations; 12120 LValue UpperLVal = CGF.EmitLValueForField( 12121 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12122 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12123 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12124 Int64Ty, NumIterations[I]->getExprLoc()); 12125 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12126 // dims.stride = 1; 12127 LValue StrideLVal = CGF.EmitLValueForField( 12128 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12129 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12130 StrideLVal); 12131 } 12132 12133 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12134 // kmp_int32 num_dims, struct kmp_dim * dims); 12135 llvm::Value *Args[] = { 12136 emitUpdateLocation(CGF, D.getBeginLoc()), 12137 getThreadID(CGF, D.getBeginLoc()), 12138 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12139 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12140 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12141 CGM.VoidPtrTy)}; 12142 12143 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12144 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12145 CGF.EmitRuntimeCall(RTLFn, Args); 12146 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12147 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12148 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12149 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12150 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12151 llvm::makeArrayRef(FiniArgs)); 12152 } 12153 12154 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12155 const OMPDependClause *C) { 12156 QualType Int64Ty = 12157 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12158 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12159 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12160 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12161 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12162 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12163 const Expr *CounterVal = C->getLoopData(I); 12164 assert(CounterVal); 12165 llvm::Value *CntVal = CGF.EmitScalarConversion( 12166 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12167 CounterVal->getExprLoc()); 12168 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12169 /*Volatile=*/false, Int64Ty); 12170 } 12171 llvm::Value *Args[] = { 12172 emitUpdateLocation(CGF, C->getBeginLoc()), 12173 getThreadID(CGF, C->getBeginLoc()), 12174 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12175 llvm::FunctionCallee RTLFn; 12176 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12177 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12178 OMPRTL___kmpc_doacross_post); 12179 } else { 12180 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12181 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12182 OMPRTL___kmpc_doacross_wait); 12183 } 12184 CGF.EmitRuntimeCall(RTLFn, Args); 12185 } 12186 12187 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12188 llvm::FunctionCallee Callee, 12189 ArrayRef<llvm::Value *> Args) const { 12190 assert(Loc.isValid() && "Outlined function call location must be valid."); 12191 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12192 12193 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12194 if (Fn->doesNotThrow()) { 12195 CGF.EmitNounwindRuntimeCall(Fn, Args); 12196 return; 12197 } 12198 } 12199 CGF.EmitRuntimeCall(Callee, Args); 12200 } 12201 12202 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12203 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12204 ArrayRef<llvm::Value *> Args) const { 12205 emitCall(CGF, Loc, OutlinedFn, Args); 12206 } 12207 12208 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12209 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12210 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12211 HasEmittedDeclareTargetRegion = true; 12212 } 12213 12214 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12215 const VarDecl *NativeParam, 12216 const VarDecl *TargetParam) const { 12217 return CGF.GetAddrOfLocalVar(NativeParam); 12218 } 12219 12220 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12221 const VarDecl *VD) { 12222 if (!VD) 12223 return Address::invalid(); 12224 Address UntiedAddr = Address::invalid(); 12225 Address UntiedRealAddr = Address::invalid(); 12226 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12227 if (It != FunctionToUntiedTaskStackMap.end()) { 12228 const UntiedLocalVarsAddressesMap &UntiedData = 12229 UntiedLocalVarsStack[It->second]; 12230 auto I = UntiedData.find(VD); 12231 if (I != UntiedData.end()) { 12232 UntiedAddr = I->second.first; 12233 UntiedRealAddr = I->second.second; 12234 } 12235 } 12236 const VarDecl *CVD = VD->getCanonicalDecl(); 12237 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12238 // Use the default allocation. 12239 if (!isAllocatableDecl(VD)) 12240 return UntiedAddr; 12241 llvm::Value *Size; 12242 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12243 if (CVD->getType()->isVariablyModifiedType()) { 12244 Size = CGF.getTypeSize(CVD->getType()); 12245 // Align the size: ((size + align - 1) / align) * align 12246 Size = CGF.Builder.CreateNUWAdd( 12247 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12248 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12249 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12250 } else { 12251 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12252 Size = CGM.getSize(Sz.alignTo(Align)); 12253 } 12254 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12255 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12256 assert(AA->getAllocator() && 12257 "Expected allocator expression for non-default allocator."); 12258 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12259 // According to the standard, the original allocator type is a enum 12260 // (integer). Convert to pointer type, if required. 12261 Allocator = CGF.EmitScalarConversion( 12262 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12263 AA->getAllocator()->getExprLoc()); 12264 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12265 12266 llvm::Value *Addr = 12267 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12268 CGM.getModule(), OMPRTL___kmpc_alloc), 12269 Args, getName({CVD->getName(), ".void.addr"})); 12270 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12271 CGM.getModule(), OMPRTL___kmpc_free); 12272 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12273 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12274 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12275 if (UntiedAddr.isValid()) 12276 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12277 12278 // Cleanup action for allocate support. 12279 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12280 llvm::FunctionCallee RTLFn; 12281 SourceLocation::UIntTy LocEncoding; 12282 Address Addr; 12283 const Expr *Allocator; 12284 12285 public: 12286 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12287 SourceLocation::UIntTy LocEncoding, Address Addr, 12288 const Expr *Allocator) 12289 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12290 Allocator(Allocator) {} 12291 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12292 if (!CGF.HaveInsertPoint()) 12293 return; 12294 llvm::Value *Args[3]; 12295 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12296 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12297 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12298 Addr.getPointer(), CGF.VoidPtrTy); 12299 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12300 // According to the standard, the original allocator type is a enum 12301 // (integer). Convert to pointer type, if required. 12302 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12303 CGF.getContext().VoidPtrTy, 12304 Allocator->getExprLoc()); 12305 Args[2] = AllocVal; 12306 12307 CGF.EmitRuntimeCall(RTLFn, Args); 12308 } 12309 }; 12310 Address VDAddr = 12311 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12312 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12313 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12314 VDAddr, AA->getAllocator()); 12315 if (UntiedRealAddr.isValid()) 12316 if (auto *Region = 12317 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12318 Region->emitUntiedSwitch(CGF); 12319 return VDAddr; 12320 } 12321 return UntiedAddr; 12322 } 12323 12324 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12325 const VarDecl *VD) const { 12326 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12327 if (It == FunctionToUntiedTaskStackMap.end()) 12328 return false; 12329 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12330 } 12331 12332 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12333 CodeGenModule &CGM, const OMPLoopDirective &S) 12334 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12335 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12336 if (!NeedToPush) 12337 return; 12338 NontemporalDeclsSet &DS = 12339 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12340 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12341 for (const Stmt *Ref : C->private_refs()) { 12342 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12343 const ValueDecl *VD; 12344 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12345 VD = DRE->getDecl(); 12346 } else { 12347 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12348 assert((ME->isImplicitCXXThis() || 12349 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12350 "Expected member of current class."); 12351 VD = ME->getMemberDecl(); 12352 } 12353 DS.insert(VD); 12354 } 12355 } 12356 } 12357 12358 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12359 if (!NeedToPush) 12360 return; 12361 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12362 } 12363 12364 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12365 CodeGenFunction &CGF, 12366 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12367 std::pair<Address, Address>> &LocalVars) 12368 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12369 if (!NeedToPush) 12370 return; 12371 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12372 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12373 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12374 } 12375 12376 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12377 if (!NeedToPush) 12378 return; 12379 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12380 } 12381 12382 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12383 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12384 12385 return llvm::any_of( 12386 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12387 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12388 } 12389 12390 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12391 const OMPExecutableDirective &S, 12392 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12393 const { 12394 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12395 // Vars in target/task regions must be excluded completely. 12396 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12397 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12398 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12399 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12400 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12401 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12402 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12403 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12404 } 12405 } 12406 // Exclude vars in private clauses. 12407 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12408 for (const Expr *Ref : C->varlists()) { 12409 if (!Ref->getType()->isScalarType()) 12410 continue; 12411 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12412 if (!DRE) 12413 continue; 12414 NeedToCheckForLPCs.insert(DRE->getDecl()); 12415 } 12416 } 12417 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12418 for (const Expr *Ref : C->varlists()) { 12419 if (!Ref->getType()->isScalarType()) 12420 continue; 12421 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12422 if (!DRE) 12423 continue; 12424 NeedToCheckForLPCs.insert(DRE->getDecl()); 12425 } 12426 } 12427 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12428 for (const Expr *Ref : C->varlists()) { 12429 if (!Ref->getType()->isScalarType()) 12430 continue; 12431 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12432 if (!DRE) 12433 continue; 12434 NeedToCheckForLPCs.insert(DRE->getDecl()); 12435 } 12436 } 12437 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12438 for (const Expr *Ref : C->varlists()) { 12439 if (!Ref->getType()->isScalarType()) 12440 continue; 12441 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12442 if (!DRE) 12443 continue; 12444 NeedToCheckForLPCs.insert(DRE->getDecl()); 12445 } 12446 } 12447 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12448 for (const Expr *Ref : C->varlists()) { 12449 if (!Ref->getType()->isScalarType()) 12450 continue; 12451 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12452 if (!DRE) 12453 continue; 12454 NeedToCheckForLPCs.insert(DRE->getDecl()); 12455 } 12456 } 12457 for (const Decl *VD : NeedToCheckForLPCs) { 12458 for (const LastprivateConditionalData &Data : 12459 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12460 if (Data.DeclToUniqueName.count(VD) > 0) { 12461 if (!Data.Disabled) 12462 NeedToAddForLPCsAsDisabled.insert(VD); 12463 break; 12464 } 12465 } 12466 } 12467 } 12468 12469 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12470 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12471 : CGM(CGF.CGM), 12472 Action((CGM.getLangOpts().OpenMP >= 50 && 12473 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12474 [](const OMPLastprivateClause *C) { 12475 return C->getKind() == 12476 OMPC_LASTPRIVATE_conditional; 12477 })) 12478 ? ActionToDo::PushAsLastprivateConditional 12479 : ActionToDo::DoNotPush) { 12480 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12481 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12482 return; 12483 assert(Action == ActionToDo::PushAsLastprivateConditional && 12484 "Expected a push action."); 12485 LastprivateConditionalData &Data = 12486 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12487 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12488 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12489 continue; 12490 12491 for (const Expr *Ref : C->varlists()) { 12492 Data.DeclToUniqueName.insert(std::make_pair( 12493 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12494 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12495 } 12496 } 12497 Data.IVLVal = IVLVal; 12498 Data.Fn = CGF.CurFn; 12499 } 12500 12501 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12502 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12503 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12504 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12505 if (CGM.getLangOpts().OpenMP < 50) 12506 return; 12507 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12508 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12509 if (!NeedToAddForLPCsAsDisabled.empty()) { 12510 Action = ActionToDo::DisableLastprivateConditional; 12511 LastprivateConditionalData &Data = 12512 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12513 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12514 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12515 Data.Fn = CGF.CurFn; 12516 Data.Disabled = true; 12517 } 12518 } 12519 12520 CGOpenMPRuntime::LastprivateConditionalRAII 12521 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12522 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12523 return LastprivateConditionalRAII(CGF, S); 12524 } 12525 12526 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12527 if (CGM.getLangOpts().OpenMP < 50) 12528 return; 12529 if (Action == ActionToDo::DisableLastprivateConditional) { 12530 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12531 "Expected list of disabled private vars."); 12532 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12533 } 12534 if (Action == ActionToDo::PushAsLastprivateConditional) { 12535 assert( 12536 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12537 "Expected list of lastprivate conditional vars."); 12538 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12539 } 12540 } 12541 12542 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12543 const VarDecl *VD) { 12544 ASTContext &C = CGM.getContext(); 12545 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12546 if (I == LastprivateConditionalToTypes.end()) 12547 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12548 QualType NewType; 12549 const FieldDecl *VDField; 12550 const FieldDecl *FiredField; 12551 LValue BaseLVal; 12552 auto VI = I->getSecond().find(VD); 12553 if (VI == I->getSecond().end()) { 12554 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12555 RD->startDefinition(); 12556 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12557 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12558 RD->completeDefinition(); 12559 NewType = C.getRecordType(RD); 12560 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12561 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12562 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12563 } else { 12564 NewType = std::get<0>(VI->getSecond()); 12565 VDField = std::get<1>(VI->getSecond()); 12566 FiredField = std::get<2>(VI->getSecond()); 12567 BaseLVal = std::get<3>(VI->getSecond()); 12568 } 12569 LValue FiredLVal = 12570 CGF.EmitLValueForField(BaseLVal, FiredField); 12571 CGF.EmitStoreOfScalar( 12572 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12573 FiredLVal); 12574 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12575 } 12576 12577 namespace { 12578 /// Checks if the lastprivate conditional variable is referenced in LHS. 12579 class LastprivateConditionalRefChecker final 12580 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12581 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12582 const Expr *FoundE = nullptr; 12583 const Decl *FoundD = nullptr; 12584 StringRef UniqueDeclName; 12585 LValue IVLVal; 12586 llvm::Function *FoundFn = nullptr; 12587 SourceLocation Loc; 12588 12589 public: 12590 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12591 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12592 llvm::reverse(LPM)) { 12593 auto It = D.DeclToUniqueName.find(E->getDecl()); 12594 if (It == D.DeclToUniqueName.end()) 12595 continue; 12596 if (D.Disabled) 12597 return false; 12598 FoundE = E; 12599 FoundD = E->getDecl()->getCanonicalDecl(); 12600 UniqueDeclName = It->second; 12601 IVLVal = D.IVLVal; 12602 FoundFn = D.Fn; 12603 break; 12604 } 12605 return FoundE == E; 12606 } 12607 bool VisitMemberExpr(const MemberExpr *E) { 12608 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12609 return false; 12610 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12611 llvm::reverse(LPM)) { 12612 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12613 if (It == D.DeclToUniqueName.end()) 12614 continue; 12615 if (D.Disabled) 12616 return false; 12617 FoundE = E; 12618 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12619 UniqueDeclName = It->second; 12620 IVLVal = D.IVLVal; 12621 FoundFn = D.Fn; 12622 break; 12623 } 12624 return FoundE == E; 12625 } 12626 bool VisitStmt(const Stmt *S) { 12627 for (const Stmt *Child : S->children()) { 12628 if (!Child) 12629 continue; 12630 if (const auto *E = dyn_cast<Expr>(Child)) 12631 if (!E->isGLValue()) 12632 continue; 12633 if (Visit(Child)) 12634 return true; 12635 } 12636 return false; 12637 } 12638 explicit LastprivateConditionalRefChecker( 12639 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12640 : LPM(LPM) {} 12641 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12642 getFoundData() const { 12643 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12644 } 12645 }; 12646 } // namespace 12647 12648 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12649 LValue IVLVal, 12650 StringRef UniqueDeclName, 12651 LValue LVal, 12652 SourceLocation Loc) { 12653 // Last updated loop counter for the lastprivate conditional var. 12654 // int<xx> last_iv = 0; 12655 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12656 llvm::Constant *LastIV = 12657 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12658 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12659 IVLVal.getAlignment().getAsAlign()); 12660 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12661 12662 // Last value of the lastprivate conditional. 12663 // decltype(priv_a) last_a; 12664 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12665 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12666 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12667 LValue LastLVal = CGF.MakeAddrLValue( 12668 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12669 12670 // Global loop counter. Required to handle inner parallel-for regions. 12671 // iv 12672 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12673 12674 // #pragma omp critical(a) 12675 // if (last_iv <= iv) { 12676 // last_iv = iv; 12677 // last_a = priv_a; 12678 // } 12679 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12680 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12681 Action.Enter(CGF); 12682 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12683 // (last_iv <= iv) ? Check if the variable is updated and store new 12684 // value in global var. 12685 llvm::Value *CmpRes; 12686 if (IVLVal.getType()->isSignedIntegerType()) { 12687 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12688 } else { 12689 assert(IVLVal.getType()->isUnsignedIntegerType() && 12690 "Loop iteration variable must be integer."); 12691 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12692 } 12693 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12694 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12695 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12696 // { 12697 CGF.EmitBlock(ThenBB); 12698 12699 // last_iv = iv; 12700 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12701 12702 // last_a = priv_a; 12703 switch (CGF.getEvaluationKind(LVal.getType())) { 12704 case TEK_Scalar: { 12705 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12706 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12707 break; 12708 } 12709 case TEK_Complex: { 12710 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12711 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12712 break; 12713 } 12714 case TEK_Aggregate: 12715 llvm_unreachable( 12716 "Aggregates are not supported in lastprivate conditional."); 12717 } 12718 // } 12719 CGF.EmitBranch(ExitBB); 12720 // There is no need to emit line number for unconditional branch. 12721 (void)ApplyDebugLocation::CreateEmpty(CGF); 12722 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12723 }; 12724 12725 if (CGM.getLangOpts().OpenMPSimd) { 12726 // Do not emit as a critical region as no parallel region could be emitted. 12727 RegionCodeGenTy ThenRCG(CodeGen); 12728 ThenRCG(CGF); 12729 } else { 12730 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12731 } 12732 } 12733 12734 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12735 const Expr *LHS) { 12736 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12737 return; 12738 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12739 if (!Checker.Visit(LHS)) 12740 return; 12741 const Expr *FoundE; 12742 const Decl *FoundD; 12743 StringRef UniqueDeclName; 12744 LValue IVLVal; 12745 llvm::Function *FoundFn; 12746 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12747 Checker.getFoundData(); 12748 if (FoundFn != CGF.CurFn) { 12749 // Special codegen for inner parallel regions. 12750 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12751 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12752 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12753 "Lastprivate conditional is not found in outer region."); 12754 QualType StructTy = std::get<0>(It->getSecond()); 12755 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12756 LValue PrivLVal = CGF.EmitLValue(FoundE); 12757 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12758 PrivLVal.getAddress(CGF), 12759 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12760 LValue BaseLVal = 12761 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12762 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12763 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12764 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12765 FiredLVal, llvm::AtomicOrdering::Unordered, 12766 /*IsVolatile=*/true, /*isInit=*/false); 12767 return; 12768 } 12769 12770 // Private address of the lastprivate conditional in the current context. 12771 // priv_a 12772 LValue LVal = CGF.EmitLValue(FoundE); 12773 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12774 FoundE->getExprLoc()); 12775 } 12776 12777 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12778 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12779 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12780 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12781 return; 12782 auto Range = llvm::reverse(LastprivateConditionalStack); 12783 auto It = llvm::find_if( 12784 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12785 if (It == Range.end() || It->Fn != CGF.CurFn) 12786 return; 12787 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12788 assert(LPCI != LastprivateConditionalToTypes.end() && 12789 "Lastprivates must be registered already."); 12790 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12791 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12792 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12793 for (const auto &Pair : It->DeclToUniqueName) { 12794 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12795 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12796 continue; 12797 auto I = LPCI->getSecond().find(Pair.first); 12798 assert(I != LPCI->getSecond().end() && 12799 "Lastprivate must be rehistered already."); 12800 // bool Cmp = priv_a.Fired != 0; 12801 LValue BaseLVal = std::get<3>(I->getSecond()); 12802 LValue FiredLVal = 12803 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12804 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12805 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12806 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12807 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12808 // if (Cmp) { 12809 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12810 CGF.EmitBlock(ThenBB); 12811 Address Addr = CGF.GetAddrOfLocalVar(VD); 12812 LValue LVal; 12813 if (VD->getType()->isReferenceType()) 12814 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12815 AlignmentSource::Decl); 12816 else 12817 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12818 AlignmentSource::Decl); 12819 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12820 D.getBeginLoc()); 12821 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12822 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12823 // } 12824 } 12825 } 12826 12827 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12828 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12829 SourceLocation Loc) { 12830 if (CGF.getLangOpts().OpenMP < 50) 12831 return; 12832 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12833 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12834 "Unknown lastprivate conditional variable."); 12835 StringRef UniqueName = It->second; 12836 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12837 // The variable was not updated in the region - exit. 12838 if (!GV) 12839 return; 12840 LValue LPLVal = CGF.MakeAddrLValue( 12841 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12842 PrivLVal.getType().getNonReferenceType()); 12843 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12844 CGF.EmitStoreOfScalar(Res, PrivLVal); 12845 } 12846 12847 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12848 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12849 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12850 llvm_unreachable("Not supported in SIMD-only mode"); 12851 } 12852 12853 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12854 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12855 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12856 llvm_unreachable("Not supported in SIMD-only mode"); 12857 } 12858 12859 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12860 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12861 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12862 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12863 bool Tied, unsigned &NumberOfParts) { 12864 llvm_unreachable("Not supported in SIMD-only mode"); 12865 } 12866 12867 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12868 SourceLocation Loc, 12869 llvm::Function *OutlinedFn, 12870 ArrayRef<llvm::Value *> CapturedVars, 12871 const Expr *IfCond, 12872 llvm::Value *NumThreads) { 12873 llvm_unreachable("Not supported in SIMD-only mode"); 12874 } 12875 12876 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12877 CodeGenFunction &CGF, StringRef CriticalName, 12878 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12879 const Expr *Hint) { 12880 llvm_unreachable("Not supported in SIMD-only mode"); 12881 } 12882 12883 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12884 const RegionCodeGenTy &MasterOpGen, 12885 SourceLocation Loc) { 12886 llvm_unreachable("Not supported in SIMD-only mode"); 12887 } 12888 12889 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12890 const RegionCodeGenTy &MasterOpGen, 12891 SourceLocation Loc, 12892 const Expr *Filter) { 12893 llvm_unreachable("Not supported in SIMD-only mode"); 12894 } 12895 12896 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12897 SourceLocation Loc) { 12898 llvm_unreachable("Not supported in SIMD-only mode"); 12899 } 12900 12901 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12902 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12903 SourceLocation Loc) { 12904 llvm_unreachable("Not supported in SIMD-only mode"); 12905 } 12906 12907 void CGOpenMPSIMDRuntime::emitSingleRegion( 12908 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12909 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12910 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12911 ArrayRef<const Expr *> AssignmentOps) { 12912 llvm_unreachable("Not supported in SIMD-only mode"); 12913 } 12914 12915 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12916 const RegionCodeGenTy &OrderedOpGen, 12917 SourceLocation Loc, 12918 bool IsThreads) { 12919 llvm_unreachable("Not supported in SIMD-only mode"); 12920 } 12921 12922 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12923 SourceLocation Loc, 12924 OpenMPDirectiveKind Kind, 12925 bool EmitChecks, 12926 bool ForceSimpleCall) { 12927 llvm_unreachable("Not supported in SIMD-only mode"); 12928 } 12929 12930 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12931 CodeGenFunction &CGF, SourceLocation Loc, 12932 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12933 bool Ordered, const DispatchRTInput &DispatchValues) { 12934 llvm_unreachable("Not supported in SIMD-only mode"); 12935 } 12936 12937 void CGOpenMPSIMDRuntime::emitForStaticInit( 12938 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12939 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12940 llvm_unreachable("Not supported in SIMD-only mode"); 12941 } 12942 12943 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12944 CodeGenFunction &CGF, SourceLocation Loc, 12945 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12946 llvm_unreachable("Not supported in SIMD-only mode"); 12947 } 12948 12949 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12950 SourceLocation Loc, 12951 unsigned IVSize, 12952 bool IVSigned) { 12953 llvm_unreachable("Not supported in SIMD-only mode"); 12954 } 12955 12956 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12957 SourceLocation Loc, 12958 OpenMPDirectiveKind DKind) { 12959 llvm_unreachable("Not supported in SIMD-only mode"); 12960 } 12961 12962 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12963 SourceLocation Loc, 12964 unsigned IVSize, bool IVSigned, 12965 Address IL, Address LB, 12966 Address UB, Address ST) { 12967 llvm_unreachable("Not supported in SIMD-only mode"); 12968 } 12969 12970 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12971 llvm::Value *NumThreads, 12972 SourceLocation Loc) { 12973 llvm_unreachable("Not supported in SIMD-only mode"); 12974 } 12975 12976 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12977 ProcBindKind ProcBind, 12978 SourceLocation Loc) { 12979 llvm_unreachable("Not supported in SIMD-only mode"); 12980 } 12981 12982 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12983 const VarDecl *VD, 12984 Address VDAddr, 12985 SourceLocation Loc) { 12986 llvm_unreachable("Not supported in SIMD-only mode"); 12987 } 12988 12989 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12990 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12991 CodeGenFunction *CGF) { 12992 llvm_unreachable("Not supported in SIMD-only mode"); 12993 } 12994 12995 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12996 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12997 llvm_unreachable("Not supported in SIMD-only mode"); 12998 } 12999 13000 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13001 ArrayRef<const Expr *> Vars, 13002 SourceLocation Loc, 13003 llvm::AtomicOrdering AO) { 13004 llvm_unreachable("Not supported in SIMD-only mode"); 13005 } 13006 13007 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13008 const OMPExecutableDirective &D, 13009 llvm::Function *TaskFunction, 13010 QualType SharedsTy, Address Shareds, 13011 const Expr *IfCond, 13012 const OMPTaskDataTy &Data) { 13013 llvm_unreachable("Not supported in SIMD-only mode"); 13014 } 13015 13016 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13017 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13018 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13019 const Expr *IfCond, const OMPTaskDataTy &Data) { 13020 llvm_unreachable("Not supported in SIMD-only mode"); 13021 } 13022 13023 void CGOpenMPSIMDRuntime::emitReduction( 13024 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13025 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13026 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13027 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13028 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13029 ReductionOps, Options); 13030 } 13031 13032 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13033 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13034 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13035 llvm_unreachable("Not supported in SIMD-only mode"); 13036 } 13037 13038 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13039 SourceLocation Loc, 13040 bool IsWorksharingReduction) { 13041 llvm_unreachable("Not supported in SIMD-only mode"); 13042 } 13043 13044 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13045 SourceLocation Loc, 13046 ReductionCodeGen &RCG, 13047 unsigned N) { 13048 llvm_unreachable("Not supported in SIMD-only mode"); 13049 } 13050 13051 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13052 SourceLocation Loc, 13053 llvm::Value *ReductionsPtr, 13054 LValue SharedLVal) { 13055 llvm_unreachable("Not supported in SIMD-only mode"); 13056 } 13057 13058 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13059 SourceLocation Loc, 13060 const OMPTaskDataTy &Data) { 13061 llvm_unreachable("Not supported in SIMD-only mode"); 13062 } 13063 13064 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13065 CodeGenFunction &CGF, SourceLocation Loc, 13066 OpenMPDirectiveKind CancelRegion) { 13067 llvm_unreachable("Not supported in SIMD-only mode"); 13068 } 13069 13070 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13071 SourceLocation Loc, const Expr *IfCond, 13072 OpenMPDirectiveKind CancelRegion) { 13073 llvm_unreachable("Not supported in SIMD-only mode"); 13074 } 13075 13076 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13077 const OMPExecutableDirective &D, StringRef ParentName, 13078 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13079 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13080 llvm_unreachable("Not supported in SIMD-only mode"); 13081 } 13082 13083 void CGOpenMPSIMDRuntime::emitTargetCall( 13084 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13085 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13086 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13087 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13088 const OMPLoopDirective &D)> 13089 SizeEmitter) { 13090 llvm_unreachable("Not supported in SIMD-only mode"); 13091 } 13092 13093 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13094 llvm_unreachable("Not supported in SIMD-only mode"); 13095 } 13096 13097 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13098 llvm_unreachable("Not supported in SIMD-only mode"); 13099 } 13100 13101 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13102 return false; 13103 } 13104 13105 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13106 const OMPExecutableDirective &D, 13107 SourceLocation Loc, 13108 llvm::Function *OutlinedFn, 13109 ArrayRef<llvm::Value *> CapturedVars) { 13110 llvm_unreachable("Not supported in SIMD-only mode"); 13111 } 13112 13113 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13114 const Expr *NumTeams, 13115 const Expr *ThreadLimit, 13116 SourceLocation Loc) { 13117 llvm_unreachable("Not supported in SIMD-only mode"); 13118 } 13119 13120 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13121 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13122 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13123 llvm_unreachable("Not supported in SIMD-only mode"); 13124 } 13125 13126 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13127 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13128 const Expr *Device) { 13129 llvm_unreachable("Not supported in SIMD-only mode"); 13130 } 13131 13132 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13133 const OMPLoopDirective &D, 13134 ArrayRef<Expr *> NumIterations) { 13135 llvm_unreachable("Not supported in SIMD-only mode"); 13136 } 13137 13138 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13139 const OMPDependClause *C) { 13140 llvm_unreachable("Not supported in SIMD-only mode"); 13141 } 13142 13143 const VarDecl * 13144 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13145 const VarDecl *NativeParam) const { 13146 llvm_unreachable("Not supported in SIMD-only mode"); 13147 } 13148 13149 Address 13150 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13151 const VarDecl *NativeParam, 13152 const VarDecl *TargetParam) const { 13153 llvm_unreachable("Not supported in SIMD-only mode"); 13154 } 13155