1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/SmallBitVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/Bitcode/BitcodeReader.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/GlobalValue.h" 38 #include "llvm/IR/InstrTypes.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/AtomicOrdering.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <cassert> 44 #include <numeric> 45 46 using namespace clang; 47 using namespace CodeGen; 48 using namespace llvm::omp; 49 50 namespace { 51 /// Base class for handling code generation inside OpenMP regions. 52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 53 public: 54 /// Kinds of OpenMP regions used in codegen. 55 enum CGOpenMPRegionKind { 56 /// Region with outlined function for standalone 'parallel' 57 /// directive. 58 ParallelOutlinedRegion, 59 /// Region with outlined function for standalone 'task' directive. 60 TaskOutlinedRegion, 61 /// Region for constructs that do not require function outlining, 62 /// like 'for', 'sections', 'atomic' etc. directives. 63 InlinedRegion, 64 /// Region with outlined function for standalone 'target' directive. 65 TargetRegion, 66 }; 67 68 CGOpenMPRegionInfo(const CapturedStmt &CS, 69 const CGOpenMPRegionKind RegionKind, 70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 71 bool HasCancel) 72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 74 75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 77 bool HasCancel) 78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 79 Kind(Kind), HasCancel(HasCancel) {} 80 81 /// Get a variable or parameter for storing global thread id 82 /// inside OpenMP construct. 83 virtual const VarDecl *getThreadIDVariable() const = 0; 84 85 /// Emit the captured statement body. 86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 87 88 /// Get an LValue for the current ThreadID variable. 89 /// \return LValue for thread id variable. This LValue always has type int32*. 90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 91 92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 93 94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 95 96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 97 98 bool hasCancel() const { return HasCancel; } 99 100 static bool classof(const CGCapturedStmtInfo *Info) { 101 return Info->getKind() == CR_OpenMP; 102 } 103 104 ~CGOpenMPRegionInfo() override = default; 105 106 protected: 107 CGOpenMPRegionKind RegionKind; 108 RegionCodeGenTy CodeGen; 109 OpenMPDirectiveKind Kind; 110 bool HasCancel; 111 }; 112 113 /// API for captured statement code generation in OpenMP constructs. 114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 115 public: 116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 117 const RegionCodeGenTy &CodeGen, 118 OpenMPDirectiveKind Kind, bool HasCancel, 119 StringRef HelperName) 120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 121 HasCancel), 122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 124 } 125 126 /// Get a variable or parameter for storing global thread id 127 /// inside OpenMP construct. 128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 129 130 /// Get the name of the capture helper. 131 StringRef getHelperName() const override { return HelperName; } 132 133 static bool classof(const CGCapturedStmtInfo *Info) { 134 return CGOpenMPRegionInfo::classof(Info) && 135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 136 ParallelOutlinedRegion; 137 } 138 139 private: 140 /// A variable or parameter storing global thread id for OpenMP 141 /// constructs. 142 const VarDecl *ThreadIDVar; 143 StringRef HelperName; 144 }; 145 146 /// API for captured statement code generation in OpenMP constructs. 147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 148 public: 149 class UntiedTaskActionTy final : public PrePostActionTy { 150 bool Untied; 151 const VarDecl *PartIDVar; 152 const RegionCodeGenTy UntiedCodeGen; 153 llvm::SwitchInst *UntiedSwitch = nullptr; 154 155 public: 156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 157 const RegionCodeGenTy &UntiedCodeGen) 158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 159 void Enter(CodeGenFunction &CGF) override { 160 if (Untied) { 161 // Emit task switching point. 162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 163 CGF.GetAddrOfLocalVar(PartIDVar), 164 PartIDVar->getType()->castAs<PointerType>()); 165 llvm::Value *Res = 166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 169 CGF.EmitBlock(DoneBB); 170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 172 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 173 CGF.Builder.GetInsertBlock()); 174 emitUntiedSwitch(CGF); 175 } 176 } 177 void emitUntiedSwitch(CodeGenFunction &CGF) const { 178 if (Untied) { 179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 180 CGF.GetAddrOfLocalVar(PartIDVar), 181 PartIDVar->getType()->castAs<PointerType>()); 182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 PartIdLVal); 184 UntiedCodeGen(CGF); 185 CodeGenFunction::JumpDest CurPoint = 186 CGF.getJumpDestInCurrentScope(".untied.next."); 187 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 190 CGF.Builder.GetInsertBlock()); 191 CGF.EmitBranchThroughCleanup(CurPoint); 192 CGF.EmitBlock(CurPoint.getBlock()); 193 } 194 } 195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 196 }; 197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 198 const VarDecl *ThreadIDVar, 199 const RegionCodeGenTy &CodeGen, 200 OpenMPDirectiveKind Kind, bool HasCancel, 201 const UntiedTaskActionTy &Action) 202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 203 ThreadIDVar(ThreadIDVar), Action(Action) { 204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 205 } 206 207 /// Get a variable or parameter for storing global thread id 208 /// inside OpenMP construct. 209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 210 211 /// Get an LValue for the current ThreadID variable. 212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 213 214 /// Get the name of the capture helper. 215 StringRef getHelperName() const override { return ".omp_outlined."; } 216 217 void emitUntiedSwitch(CodeGenFunction &CGF) override { 218 Action.emitUntiedSwitch(CGF); 219 } 220 221 static bool classof(const CGCapturedStmtInfo *Info) { 222 return CGOpenMPRegionInfo::classof(Info) && 223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 224 TaskOutlinedRegion; 225 } 226 227 private: 228 /// A variable or parameter storing global thread id for OpenMP 229 /// constructs. 230 const VarDecl *ThreadIDVar; 231 /// Action for emitting code for untied tasks. 232 const UntiedTaskActionTy &Action; 233 }; 234 235 /// API for inlined captured statement code generation in OpenMP 236 /// constructs. 237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 238 public: 239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 240 const RegionCodeGenTy &CodeGen, 241 OpenMPDirectiveKind Kind, bool HasCancel) 242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 243 OldCSI(OldCSI), 244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 245 246 // Retrieve the value of the context parameter. 247 llvm::Value *getContextValue() const override { 248 if (OuterRegionInfo) 249 return OuterRegionInfo->getContextValue(); 250 llvm_unreachable("No context value for inlined OpenMP region"); 251 } 252 253 void setContextValue(llvm::Value *V) override { 254 if (OuterRegionInfo) { 255 OuterRegionInfo->setContextValue(V); 256 return; 257 } 258 llvm_unreachable("No context value for inlined OpenMP region"); 259 } 260 261 /// Lookup the captured field decl for a variable. 262 const FieldDecl *lookup(const VarDecl *VD) const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->lookup(VD); 265 // If there is no outer outlined region,no need to lookup in a list of 266 // captured variables, we can use the original one. 267 return nullptr; 268 } 269 270 FieldDecl *getThisFieldDecl() const override { 271 if (OuterRegionInfo) 272 return OuterRegionInfo->getThisFieldDecl(); 273 return nullptr; 274 } 275 276 /// Get a variable or parameter for storing global thread id 277 /// inside OpenMP construct. 278 const VarDecl *getThreadIDVariable() const override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariable(); 281 return nullptr; 282 } 283 284 /// Get an LValue for the current ThreadID variable. 285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 286 if (OuterRegionInfo) 287 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 288 llvm_unreachable("No LValue for inlined OpenMP construct"); 289 } 290 291 /// Get the name of the capture helper. 292 StringRef getHelperName() const override { 293 if (auto *OuterRegionInfo = getOldCSI()) 294 return OuterRegionInfo->getHelperName(); 295 llvm_unreachable("No helper name for inlined OpenMP construct"); 296 } 297 298 void emitUntiedSwitch(CodeGenFunction &CGF) override { 299 if (OuterRegionInfo) 300 OuterRegionInfo->emitUntiedSwitch(CGF); 301 } 302 303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 304 305 static bool classof(const CGCapturedStmtInfo *Info) { 306 return CGOpenMPRegionInfo::classof(Info) && 307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 308 } 309 310 ~CGOpenMPInlinedRegionInfo() override = default; 311 312 private: 313 /// CodeGen info about outer OpenMP region. 314 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 315 CGOpenMPRegionInfo *OuterRegionInfo; 316 }; 317 318 /// API for captured statement code generation in OpenMP target 319 /// constructs. For this captures, implicit parameters are used instead of the 320 /// captured fields. The name of the target region has to be unique in a given 321 /// application so it is provided by the client, because only the client has 322 /// the information to generate that. 323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 324 public: 325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 326 const RegionCodeGenTy &CodeGen, StringRef HelperName) 327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 328 /*HasCancel=*/false), 329 HelperName(HelperName) {} 330 331 /// This is unused for target regions because each starts executing 332 /// with a single thread. 333 const VarDecl *getThreadIDVariable() const override { return nullptr; } 334 335 /// Get the name of the capture helper. 336 StringRef getHelperName() const override { return HelperName; } 337 338 static bool classof(const CGCapturedStmtInfo *Info) { 339 return CGOpenMPRegionInfo::classof(Info) && 340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 341 } 342 343 private: 344 StringRef HelperName; 345 }; 346 347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 348 llvm_unreachable("No codegen for expressions"); 349 } 350 /// API for generation of expressions captured in a innermost OpenMP 351 /// region. 352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 353 public: 354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 356 OMPD_unknown, 357 /*HasCancel=*/false), 358 PrivScope(CGF) { 359 // Make sure the globals captured in the provided statement are local by 360 // using the privatization logic. We assume the same variable is not 361 // captured more than once. 362 for (const auto &C : CS.captures()) { 363 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 364 continue; 365 366 const VarDecl *VD = C.getCapturedVar(); 367 if (VD->isLocalVarDeclOrParm()) 368 continue; 369 370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 371 /*RefersToEnclosingVariableOrCapture=*/false, 372 VD->getType().getNonReferenceType(), VK_LValue, 373 C.getLocation()); 374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); 375 } 376 (void)PrivScope.Privatize(); 377 } 378 379 /// Lookup the captured field decl for a variable. 380 const FieldDecl *lookup(const VarDecl *VD) const override { 381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 382 return FD; 383 return nullptr; 384 } 385 386 /// Emit the captured statement body. 387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 388 llvm_unreachable("No body for expressions"); 389 } 390 391 /// Get a variable or parameter for storing global thread id 392 /// inside OpenMP construct. 393 const VarDecl *getThreadIDVariable() const override { 394 llvm_unreachable("No thread id for expressions"); 395 } 396 397 /// Get the name of the capture helper. 398 StringRef getHelperName() const override { 399 llvm_unreachable("No helper name for expressions"); 400 } 401 402 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 403 404 private: 405 /// Private scope to capture global variables. 406 CodeGenFunction::OMPPrivateScope PrivScope; 407 }; 408 409 /// RAII for emitting code of OpenMP constructs. 410 class InlinedOpenMPRegionRAII { 411 CodeGenFunction &CGF; 412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 413 FieldDecl *LambdaThisCaptureField = nullptr; 414 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 415 bool NoInheritance = false; 416 417 public: 418 /// Constructs region for combined constructs. 419 /// \param CodeGen Code generation sequence for combined directives. Includes 420 /// a list of functions used for code generation of implicitly inlined 421 /// regions. 422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 423 OpenMPDirectiveKind Kind, bool HasCancel, 424 bool NoInheritance = true) 425 : CGF(CGF), NoInheritance(NoInheritance) { 426 // Start emission for the construct. 427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 429 if (NoInheritance) { 430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 431 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 432 CGF.LambdaThisCaptureField = nullptr; 433 BlockInfo = CGF.BlockInfo; 434 CGF.BlockInfo = nullptr; 435 } 436 } 437 438 ~InlinedOpenMPRegionRAII() { 439 // Restore original CapturedStmtInfo only if we're done with code emission. 440 auto *OldCSI = 441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 442 delete CGF.CapturedStmtInfo; 443 CGF.CapturedStmtInfo = OldCSI; 444 if (NoInheritance) { 445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 446 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 447 CGF.BlockInfo = BlockInfo; 448 } 449 } 450 }; 451 452 /// Values for bit flags used in the ident_t to describe the fields. 453 /// All enumeric elements are named and described in accordance with the code 454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 455 enum OpenMPLocationFlags : unsigned { 456 /// Use trampoline for internal microtask. 457 OMP_IDENT_IMD = 0x01, 458 /// Use c-style ident structure. 459 OMP_IDENT_KMPC = 0x02, 460 /// Atomic reduction option for kmpc_reduce. 461 OMP_ATOMIC_REDUCE = 0x10, 462 /// Explicit 'barrier' directive. 463 OMP_IDENT_BARRIER_EXPL = 0x20, 464 /// Implicit barrier in code. 465 OMP_IDENT_BARRIER_IMPL = 0x40, 466 /// Implicit barrier in 'for' directive. 467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 468 /// Implicit barrier in 'sections' directive. 469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 470 /// Implicit barrier in 'single' directive. 471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 472 /// Call of __kmp_for_static_init for static loop. 473 OMP_IDENT_WORK_LOOP = 0x200, 474 /// Call of __kmp_for_static_init for sections. 475 OMP_IDENT_WORK_SECTIONS = 0x400, 476 /// Call of __kmp_for_static_init for distribute. 477 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 479 }; 480 481 namespace { 482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 483 /// Values for bit flags for marking which requires clauses have been used. 484 enum OpenMPOffloadingRequiresDirFlags : int64_t { 485 /// flag undefined. 486 OMP_REQ_UNDEFINED = 0x000, 487 /// no requires clause present. 488 OMP_REQ_NONE = 0x001, 489 /// reverse_offload clause. 490 OMP_REQ_REVERSE_OFFLOAD = 0x002, 491 /// unified_address clause. 492 OMP_REQ_UNIFIED_ADDRESS = 0x004, 493 /// unified_shared_memory clause. 494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 495 /// dynamic_allocators clause. 496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 498 }; 499 500 enum OpenMPOffloadingReservedDeviceIDs { 501 /// Device ID if the device was not defined, runtime should get it 502 /// from environment variables in the spec. 503 OMP_DEVICEID_UNDEF = -1, 504 }; 505 } // anonymous namespace 506 507 /// Describes ident structure that describes a source location. 508 /// All descriptions are taken from 509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 510 /// Original structure: 511 /// typedef struct ident { 512 /// kmp_int32 reserved_1; /**< might be used in Fortran; 513 /// see above */ 514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 515 /// KMP_IDENT_KMPC identifies this union 516 /// member */ 517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 518 /// see above */ 519 ///#if USE_ITT_BUILD 520 /// /* but currently used for storing 521 /// region-specific ITT */ 522 /// /* contextual information. */ 523 ///#endif /* USE_ITT_BUILD */ 524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 525 /// C++ */ 526 /// char const *psource; /**< String describing the source location. 527 /// The string is composed of semi-colon separated 528 // fields which describe the source file, 529 /// the function and a pair of line numbers that 530 /// delimit the construct. 531 /// */ 532 /// } ident_t; 533 enum IdentFieldIndex { 534 /// might be used in Fortran 535 IdentField_Reserved_1, 536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 537 IdentField_Flags, 538 /// Not really used in Fortran any more 539 IdentField_Reserved_2, 540 /// Source[4] in Fortran, do not use for C++ 541 IdentField_Reserved_3, 542 /// String describing the source location. The string is composed of 543 /// semi-colon separated fields which describe the source file, the function 544 /// and a pair of line numbers that delimit the construct. 545 IdentField_PSource 546 }; 547 548 /// Schedule types for 'omp for' loops (these enumerators are taken from 549 /// the enum sched_type in kmp.h). 550 enum OpenMPSchedType { 551 /// Lower bound for default (unordered) versions. 552 OMP_sch_lower = 32, 553 OMP_sch_static_chunked = 33, 554 OMP_sch_static = 34, 555 OMP_sch_dynamic_chunked = 35, 556 OMP_sch_guided_chunked = 36, 557 OMP_sch_runtime = 37, 558 OMP_sch_auto = 38, 559 /// static with chunk adjustment (e.g., simd) 560 OMP_sch_static_balanced_chunked = 45, 561 /// Lower bound for 'ordered' versions. 562 OMP_ord_lower = 64, 563 OMP_ord_static_chunked = 65, 564 OMP_ord_static = 66, 565 OMP_ord_dynamic_chunked = 67, 566 OMP_ord_guided_chunked = 68, 567 OMP_ord_runtime = 69, 568 OMP_ord_auto = 70, 569 OMP_sch_default = OMP_sch_static, 570 /// dist_schedule types 571 OMP_dist_sch_static_chunked = 91, 572 OMP_dist_sch_static = 92, 573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 574 /// Set if the monotonic schedule modifier was present. 575 OMP_sch_modifier_monotonic = (1 << 29), 576 /// Set if the nonmonotonic schedule modifier was present. 577 OMP_sch_modifier_nonmonotonic = (1 << 30), 578 }; 579 580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 581 /// region. 582 class CleanupTy final : public EHScopeStack::Cleanup { 583 PrePostActionTy *Action; 584 585 public: 586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 588 if (!CGF.HaveInsertPoint()) 589 return; 590 Action->Exit(CGF); 591 } 592 }; 593 594 } // anonymous namespace 595 596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 597 CodeGenFunction::RunCleanupsScope Scope(CGF); 598 if (PrePostAction) { 599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 600 Callback(CodeGen, CGF, *PrePostAction); 601 } else { 602 PrePostActionTy Action; 603 Callback(CodeGen, CGF, Action); 604 } 605 } 606 607 /// Check if the combiner is a call to UDR combiner and if it is so return the 608 /// UDR decl used for reduction. 609 static const OMPDeclareReductionDecl * 610 getReductionInit(const Expr *ReductionOp) { 611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 613 if (const auto *DRE = 614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 616 return DRD; 617 return nullptr; 618 } 619 620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 621 const OMPDeclareReductionDecl *DRD, 622 const Expr *InitOp, 623 Address Private, Address Original, 624 QualType Ty) { 625 if (DRD->getInitializer()) { 626 std::pair<llvm::Function *, llvm::Function *> Reduction = 627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 628 const auto *CE = cast<CallExpr>(InitOp); 629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 632 const auto *LHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 634 const auto *RHSDRE = 635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 if (DRD) 691 SrcAddr = 692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 693 694 llvm::Value *SrcBegin = nullptr; 695 if (DRD) 696 SrcBegin = SrcAddr.getPointer(); 697 llvm::Value *DestBegin = DestAddr.getPointer(); 698 // Cast from pointer to array type to pointer to single element. 699 llvm::Value *DestEnd = 700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, SrcAddr.getElementType(), 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, DestAddr.getElementType(), 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 746 "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 753 "omp.arraycpy.dest.element"); 754 // Check whether we've reached the end. 755 llvm::Value *Done = 756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 759 760 // Done. 761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 762 } 763 764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 765 return CGF.EmitOMPSharedLValue(E); 766 } 767 768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 769 const Expr *E) { 770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 772 return LValue(); 773 } 774 775 void ReductionCodeGen::emitAggregateInitialization( 776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 777 const OMPDeclareReductionDecl *DRD) { 778 // Emit VarDecl with copy init for arrays. 779 // Get the address of the original variable captured in current 780 // captured region. 781 const auto *PrivateVD = 782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 783 bool EmitDeclareReductionInit = 784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 786 EmitDeclareReductionInit, 787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 788 : PrivateVD->getInit(), 789 DRD, SharedAddr); 790 } 791 792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 793 ArrayRef<const Expr *> Origs, 794 ArrayRef<const Expr *> Privates, 795 ArrayRef<const Expr *> ReductionOps) { 796 ClausesData.reserve(Shareds.size()); 797 SharedAddresses.reserve(Shareds.size()); 798 Sizes.reserve(Shareds.size()); 799 BaseDecls.reserve(Shareds.size()); 800 const auto *IOrig = Origs.begin(); 801 const auto *IPriv = Privates.begin(); 802 const auto *IRed = ReductionOps.begin(); 803 for (const Expr *Ref : Shareds) { 804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 805 std::advance(IOrig, 1); 806 std::advance(IPriv, 1); 807 std::advance(IRed, 1); 808 } 809 } 810 811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 813 "Number of generated lvalues must be exactly N."); 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 816 SharedAddresses.emplace_back(First, Second); 817 if (ClausesData[N].Shared == ClausesData[N].Ref) { 818 OrigAddresses.emplace_back(First, Second); 819 } else { 820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 822 OrigAddresses.emplace_back(First, Second); 823 } 824 } 825 826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 827 QualType PrivateType = getPrivateType(N); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 839 if (AsArraySection) { 840 Size = CGF.Builder.CreatePtrDiff(ElemType, 841 OrigAddresses[N].second.getPointer(CGF), 842 OrigAddresses[N].first.getPointer(CGF)); 843 Size = CGF.Builder.CreateNUWAdd( 844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 846 } else { 847 SizeInChars = 848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 850 } 851 Sizes.emplace_back(SizeInChars, Size); 852 CodeGenFunction::OpaqueValueMapping OpaqueMap( 853 CGF, 854 cast<OpaqueValueExpr>( 855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 856 RValue::get(Size)); 857 CGF.EmitVariablyModifiedType(PrivateType); 858 } 859 860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 861 llvm::Value *Size) { 862 QualType PrivateType = getPrivateType(N); 863 if (!PrivateType->isVariablyModifiedType()) { 864 assert(!Size && !Sizes[N].second && 865 "Size should be nullptr for non-variably modified reduction " 866 "items."); 867 return; 868 } 869 CodeGenFunction::OpaqueValueMapping OpaqueMap( 870 CGF, 871 cast<OpaqueValueExpr>( 872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 873 RValue::get(Size)); 874 CGF.EmitVariablyModifiedType(PrivateType); 875 } 876 877 void ReductionCodeGen::emitInitialization( 878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 880 assert(SharedAddresses.size() > N && "No variable was generated"); 881 const auto *PrivateVD = 882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 883 const OMPDeclareReductionDecl *DRD = 884 getReductionInit(ClausesData[N].ReductionOp); 885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 886 if (DRD && DRD->getInitializer()) 887 (void)DefaultInit(CGF); 888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 890 (void)DefaultInit(CGF); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 893 PrivateAddr, SharedAddr, SharedType); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 QualType PrivateType = getPrivateType(N); 904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 905 return DTorKind != QualType::DK_none; 906 } 907 908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 909 Address PrivateAddr) { 910 QualType PrivateType = getPrivateType(N); 911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 912 if (needCleanups(N)) { 913 PrivateAddr = CGF.Builder.CreateElementBitCast( 914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 916 } 917 } 918 919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 920 LValue BaseLV) { 921 BaseTy = BaseTy.getNonReferenceType(); 922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 923 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 926 } else { 927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 929 } 930 BaseTy = BaseTy->getPointeeType(); 931 } 932 return CGF.MakeAddrLValue( 933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 934 CGF.ConvertTypeForMem(ElTy)), 935 BaseLV.getType(), BaseLV.getBaseInfo(), 936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 937 } 938 939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 Address OriginalBaseAddress, llvm::Value *Addr) { 941 Address Tmp = Address::invalid(); 942 Address TopTmp = Address::invalid(); 943 Address MostTopTmp = Address::invalid(); 944 BaseTy = BaseTy.getNonReferenceType(); 945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 946 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 947 Tmp = CGF.CreateMemTemp(BaseTy); 948 if (TopTmp.isValid()) 949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 950 else 951 MostTopTmp = Tmp; 952 TopTmp = Tmp; 953 BaseTy = BaseTy->getPointeeType(); 954 } 955 956 if (Tmp.isValid()) { 957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 958 Addr, Tmp.getElementType()); 959 CGF.Builder.CreateStore(Addr, Tmp); 960 return MostTopTmp; 961 } 962 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 964 Addr, OriginalBaseAddress.getType()); 965 return OriginalBaseAddress.withPointer(Addr); 966 } 967 968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 969 const VarDecl *OrigVD = nullptr; 970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 973 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 975 Base = TempASE->getBase()->IgnoreParenImpCasts(); 976 DE = cast<DeclRefExpr>(Base); 977 OrigVD = cast<VarDecl>(DE->getDecl()); 978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } 985 return OrigVD; 986 } 987 988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 989 Address PrivateAddr) { 990 const DeclRefExpr *DE; 991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 992 BaseDecls.emplace_back(OrigVD); 993 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 994 LValue BaseLValue = 995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 996 OriginalBaseLValue); 997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1000 SharedAddr.getPointer()); 1001 llvm::Value *PrivatePointer = 1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1003 PrivateAddr.getPointer(), SharedAddr.getType()); 1004 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1005 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1006 return castToBase(CGF, OrigVD->getType(), 1007 SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue.getAddress(CGF), Ptr); 1009 } 1010 BaseDecls.emplace_back( 1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1012 return PrivateAddr; 1013 } 1014 1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1016 const OMPDeclareReductionDecl *DRD = 1017 getReductionInit(ClausesData[N].ReductionOp); 1018 return DRD && DRD->getInitializer(); 1019 } 1020 1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1022 return CGF.EmitLoadOfPointerLValue( 1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1024 getThreadIDVariable()->getType()->castAs<PointerType>()); 1025 } 1026 1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1028 if (!CGF.HaveInsertPoint()) 1029 return; 1030 // 1.2.2 OpenMP Language Terminology 1031 // Structured block - An executable statement with a single entry at the 1032 // top and a single exit at the bottom. 1033 // The point of exit cannot be a branch out of the structured block. 1034 // longjmp() and throw() must not violate the entry/exit criteria. 1035 CGF.EHStack.pushTerminate(); 1036 if (S) 1037 CGF.incrementProfileCounter(S); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1065 1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1067 OMPBuilder.initialize(); 1068 loadOffloadInfoMetadata(); 1069 } 1070 1071 void CGOpenMPRuntime::clear() { 1072 InternalVars.clear(); 1073 // Clean non-target variable declarations possibly used only in debug info. 1074 for (const auto &Data : EmittedNonTargetVariables) { 1075 if (!Data.getValue().pointsToAliveValue()) 1076 continue; 1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1078 if (!GV) 1079 continue; 1080 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1081 continue; 1082 GV->eraseFromParent(); 1083 } 1084 } 1085 1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1087 SmallString<128> Buffer; 1088 llvm::raw_svector_ostream OS(Buffer); 1089 StringRef Sep = FirstSeparator; 1090 for (StringRef Part : Parts) { 1091 OS << Sep << Part; 1092 Sep = Separator; 1093 } 1094 return std::string(OS.str()); 1095 } 1096 1097 static llvm::Function * 1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1099 const Expr *CombinerInitializer, const VarDecl *In, 1100 const VarDecl *Out, bool IsCombiner) { 1101 // void .omp_combiner.(Ty *in, Ty *out); 1102 ASTContext &C = CGM.getContext(); 1103 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1104 FunctionArgList Args; 1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 Args.push_back(&OmpOutParm); 1110 Args.push_back(&OmpInParm); 1111 const CGFunctionInfo &FnInfo = 1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1114 std::string Name = CGM.getOpenMPRuntime().getName( 1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1117 Name, &CGM.getModule()); 1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1119 if (CGM.getLangOpts().Optimize) { 1120 Fn->removeFnAttr(llvm::Attribute::NoInline); 1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1123 } 1124 CodeGenFunction CGF(CGM); 1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1128 Out->getLocation()); 1129 CodeGenFunction::OMPPrivateScope Scope(CGF); 1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1131 Scope.addPrivate( 1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1133 .getAddress(CGF)); 1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1135 Scope.addPrivate( 1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1137 .getAddress(CGF)); 1138 (void)Scope.Privatize(); 1139 if (!IsCombiner && Out->hasInit() && 1140 !CGF.isTrivialInitializer(Out->getInit())) { 1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1142 Out->getType().getQualifiers(), 1143 /*IsInitializer=*/true); 1144 } 1145 if (CombinerInitializer) 1146 CGF.EmitIgnoredExpr(CombinerInitializer); 1147 Scope.ForceCleanup(); 1148 CGF.FinishFunction(); 1149 return Fn; 1150 } 1151 1152 void CGOpenMPRuntime::emitUserDefinedReduction( 1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1154 if (UDRMap.count(D) > 0) 1155 return; 1156 llvm::Function *Combiner = emitCombinerOrInitializer( 1157 CGM, D->getType(), D->getCombiner(), 1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1160 /*IsCombiner=*/true); 1161 llvm::Function *Initializer = nullptr; 1162 if (const Expr *Init = D->getInitializer()) { 1163 Initializer = emitCombinerOrInitializer( 1164 CGM, D->getType(), 1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1166 : nullptr, 1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1169 /*IsCombiner=*/false); 1170 } 1171 UDRMap.try_emplace(D, Combiner, Initializer); 1172 if (CGF) { 1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1174 Decls.second.push_back(D); 1175 } 1176 } 1177 1178 std::pair<llvm::Function *, llvm::Function *> 1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1180 auto I = UDRMap.find(D); 1181 if (I != UDRMap.end()) 1182 return I->second; 1183 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1184 return UDRMap.lookup(D); 1185 } 1186 1187 namespace { 1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1189 // Builder if one is present. 1190 struct PushAndPopStackRAII { 1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1192 bool HasCancel, llvm::omp::Directive Kind) 1193 : OMPBuilder(OMPBuilder) { 1194 if (!OMPBuilder) 1195 return; 1196 1197 // The following callback is the crucial part of clangs cleanup process. 1198 // 1199 // NOTE: 1200 // Once the OpenMPIRBuilder is used to create parallel regions (and 1201 // similar), the cancellation destination (Dest below) is determined via 1202 // IP. That means if we have variables to finalize we split the block at IP, 1203 // use the new block (=BB) as destination to build a JumpDest (via 1204 // getJumpDestInCurrentScope(BB)) which then is fed to 1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1206 // to push & pop an FinalizationInfo object. 1207 // The FiniCB will still be needed but at the point where the 1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1210 assert(IP.getBlock()->end() == IP.getPoint() && 1211 "Clang CG should cause non-terminated block!"); 1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1213 CGF.Builder.restoreIP(IP); 1214 CodeGenFunction::JumpDest Dest = 1215 CGF.getOMPCancelDestination(OMPD_parallel); 1216 CGF.EmitBranchThroughCleanup(Dest); 1217 }; 1218 1219 // TODO: Remove this once we emit parallel regions through the 1220 // OpenMPIRBuilder as it can do this setup internally. 1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1222 OMPBuilder->pushFinalizationCB(std::move(FI)); 1223 } 1224 ~PushAndPopStackRAII() { 1225 if (OMPBuilder) 1226 OMPBuilder->popFinalizationCB(); 1227 } 1228 llvm::OpenMPIRBuilder *OMPBuilder; 1229 }; 1230 } // namespace 1231 1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1236 assert(ThreadIDVar->getType()->isPointerType() && 1237 "thread id variable must be of type kmp_int32 *"); 1238 CodeGenFunction CGF(CGM, true); 1239 bool HasCancel = false; 1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1241 HasCancel = OPD->hasCancel(); 1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1243 HasCancel = OPD->hasCancel(); 1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1245 HasCancel = OPSD->hasCancel(); 1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1247 HasCancel = OPFD->hasCancel(); 1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1249 HasCancel = OPFD->hasCancel(); 1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1251 HasCancel = OPFD->hasCancel(); 1252 else if (const auto *OPFD = 1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = 1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1257 HasCancel = OPFD->hasCancel(); 1258 1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1260 // parallel region to make cancellation barriers work properly. 1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1264 HasCancel, OutlinedHelperName); 1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1267 } 1268 1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1273 return emitParallelOrTeamsOutlinedFunction( 1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1281 return emitParallelOrTeamsOutlinedFunction( 1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1289 bool Tied, unsigned &NumberOfParts) { 1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1291 PrePostActionTy &) { 1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1294 llvm::Value *TaskArgs[] = { 1295 UpLoc, ThreadID, 1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1297 TaskTVar->getType()->castAs<PointerType>()) 1298 .getPointer(CGF)}; 1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1300 CGM.getModule(), OMPRTL___kmpc_omp_task), 1301 TaskArgs); 1302 }; 1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1304 UntiedCodeGen); 1305 CodeGen.setAction(Action); 1306 assert(!ThreadIDVar->getType()->isPointerType() && 1307 "thread id variable must be of type kmp_int32 for tasks"); 1308 const OpenMPDirectiveKind Region = 1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1310 : OMPD_task; 1311 const CapturedStmt *CS = D.getCapturedStmt(Region); 1312 bool HasCancel = false; 1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1320 HasCancel = TD->hasCancel(); 1321 1322 CodeGenFunction CGF(CGM, true); 1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1324 InnermostKind, HasCancel, Action); 1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1327 if (!Tied) 1328 NumberOfParts = Action.getNumberOfParts(); 1329 return Res; 1330 } 1331 1332 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1333 const RecordDecl *RD, const CGRecordLayout &RL, 1334 ArrayRef<llvm::Constant *> Data) { 1335 llvm::StructType *StructTy = RL.getLLVMType(); 1336 unsigned PrevIdx = 0; 1337 ConstantInitBuilder CIBuilder(CGM); 1338 const auto *DI = Data.begin(); 1339 for (const FieldDecl *FD : RD->fields()) { 1340 unsigned Idx = RL.getLLVMFieldNo(FD); 1341 // Fill the alignment. 1342 for (unsigned I = PrevIdx; I < Idx; ++I) 1343 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1344 PrevIdx = Idx + 1; 1345 Fields.add(*DI); 1346 ++DI; 1347 } 1348 } 1349 1350 template <class... As> 1351 static llvm::GlobalVariable * 1352 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1353 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1354 As &&... Args) { 1355 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1356 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1357 ConstantInitBuilder CIBuilder(CGM); 1358 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1359 buildStructValue(Fields, CGM, RD, RL, Data); 1360 return Fields.finishAndCreateGlobal( 1361 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1362 std::forward<As>(Args)...); 1363 } 1364 1365 template <typename T> 1366 static void 1367 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1368 ArrayRef<llvm::Constant *> Data, 1369 T &Parent) { 1370 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1371 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1372 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 Fields.finishAndAddTo(Parent); 1375 } 1376 1377 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1378 bool AtCurrentPoint) { 1379 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1380 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1381 1382 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1383 if (AtCurrentPoint) { 1384 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1385 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1386 } else { 1387 Elem.second.ServiceInsertPt = 1388 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1389 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1390 } 1391 } 1392 1393 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1394 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1395 if (Elem.second.ServiceInsertPt) { 1396 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1397 Elem.second.ServiceInsertPt = nullptr; 1398 Ptr->eraseFromParent(); 1399 } 1400 } 1401 1402 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1403 SourceLocation Loc, 1404 SmallString<128> &Buffer) { 1405 llvm::raw_svector_ostream OS(Buffer); 1406 // Build debug location 1407 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1408 OS << ";" << PLoc.getFilename() << ";"; 1409 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1410 OS << FD->getQualifiedNameAsString(); 1411 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1412 return OS.str(); 1413 } 1414 1415 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1416 SourceLocation Loc, 1417 unsigned Flags) { 1418 uint32_t SrcLocStrSize; 1419 llvm::Constant *SrcLocStr; 1420 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1421 Loc.isInvalid()) { 1422 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1423 } else { 1424 std::string FunctionName; 1425 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1426 FunctionName = FD->getQualifiedNameAsString(); 1427 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1428 const char *FileName = PLoc.getFilename(); 1429 unsigned Line = PLoc.getLine(); 1430 unsigned Column = PLoc.getColumn(); 1431 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1432 Column, SrcLocStrSize); 1433 } 1434 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1435 return OMPBuilder.getOrCreateIdent( 1436 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1437 } 1438 1439 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1440 SourceLocation Loc) { 1441 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1442 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1443 // the clang invariants used below might be broken. 1444 if (CGM.getLangOpts().OpenMPIRBuilder) { 1445 SmallString<128> Buffer; 1446 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1447 uint32_t SrcLocStrSize; 1448 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1449 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1450 return OMPBuilder.getOrCreateThreadID( 1451 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1452 } 1453 1454 llvm::Value *ThreadID = nullptr; 1455 // Check whether we've already cached a load of the thread id in this 1456 // function. 1457 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1458 if (I != OpenMPLocThreadIDMap.end()) { 1459 ThreadID = I->second.ThreadID; 1460 if (ThreadID != nullptr) 1461 return ThreadID; 1462 } 1463 // If exceptions are enabled, do not use parameter to avoid possible crash. 1464 if (auto *OMPRegionInfo = 1465 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1466 if (OMPRegionInfo->getThreadIDVariable()) { 1467 // Check if this an outlined function with thread id passed as argument. 1468 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1469 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1470 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1471 !CGF.getLangOpts().CXXExceptions || 1472 CGF.Builder.GetInsertBlock() == TopBlock || 1473 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1474 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1475 TopBlock || 1476 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1477 CGF.Builder.GetInsertBlock()) { 1478 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1479 // If value loaded in entry block, cache it and use it everywhere in 1480 // function. 1481 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1482 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1483 Elem.second.ThreadID = ThreadID; 1484 } 1485 return ThreadID; 1486 } 1487 } 1488 } 1489 1490 // This is not an outlined function region - need to call __kmpc_int32 1491 // kmpc_global_thread_num(ident_t *loc). 1492 // Generate thread id value and cache this value for use across the 1493 // function. 1494 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1495 if (!Elem.second.ServiceInsertPt) 1496 setLocThreadIdInsertPt(CGF); 1497 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1498 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1499 llvm::CallInst *Call = CGF.Builder.CreateCall( 1500 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1501 OMPRTL___kmpc_global_thread_num), 1502 emitUpdateLocation(CGF, Loc)); 1503 Call->setCallingConv(CGF.getRuntimeCC()); 1504 Elem.second.ThreadID = Call; 1505 return Call; 1506 } 1507 1508 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1509 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1510 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1511 clearLocThreadIdInsertPt(CGF); 1512 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1513 } 1514 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1515 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1516 UDRMap.erase(D); 1517 FunctionUDRMap.erase(CGF.CurFn); 1518 } 1519 auto I = FunctionUDMMap.find(CGF.CurFn); 1520 if (I != FunctionUDMMap.end()) { 1521 for(const auto *D : I->second) 1522 UDMMap.erase(D); 1523 FunctionUDMMap.erase(I); 1524 } 1525 LastprivateConditionalToTypes.erase(CGF.CurFn); 1526 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1527 } 1528 1529 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1530 return OMPBuilder.IdentPtr; 1531 } 1532 1533 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1534 if (!Kmpc_MicroTy) { 1535 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1536 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1537 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1538 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1539 } 1540 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1541 } 1542 1543 llvm::FunctionCallee 1544 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1545 bool IsGPUDistribute) { 1546 assert((IVSize == 32 || IVSize == 64) && 1547 "IV size is not compatible with the omp runtime"); 1548 StringRef Name; 1549 if (IsGPUDistribute) 1550 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1551 : "__kmpc_distribute_static_init_4u") 1552 : (IVSigned ? "__kmpc_distribute_static_init_8" 1553 : "__kmpc_distribute_static_init_8u"); 1554 else 1555 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1556 : "__kmpc_for_static_init_4u") 1557 : (IVSigned ? "__kmpc_for_static_init_8" 1558 : "__kmpc_for_static_init_8u"); 1559 1560 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1561 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1562 llvm::Type *TypeParams[] = { 1563 getIdentTyPointerTy(), // loc 1564 CGM.Int32Ty, // tid 1565 CGM.Int32Ty, // schedtype 1566 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1567 PtrTy, // p_lower 1568 PtrTy, // p_upper 1569 PtrTy, // p_stride 1570 ITy, // incr 1571 ITy // chunk 1572 }; 1573 auto *FnTy = 1574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1575 return CGM.CreateRuntimeFunction(FnTy, Name); 1576 } 1577 1578 llvm::FunctionCallee 1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1580 assert((IVSize == 32 || IVSize == 64) && 1581 "IV size is not compatible with the omp runtime"); 1582 StringRef Name = 1583 IVSize == 32 1584 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1585 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1586 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1587 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1588 CGM.Int32Ty, // tid 1589 CGM.Int32Ty, // schedtype 1590 ITy, // lower 1591 ITy, // upper 1592 ITy, // stride 1593 ITy // chunk 1594 }; 1595 auto *FnTy = 1596 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1597 return CGM.CreateRuntimeFunction(FnTy, Name); 1598 } 1599 1600 llvm::FunctionCallee 1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1602 assert((IVSize == 32 || IVSize == 64) && 1603 "IV size is not compatible with the omp runtime"); 1604 StringRef Name = 1605 IVSize == 32 1606 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1607 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1608 llvm::Type *TypeParams[] = { 1609 getIdentTyPointerTy(), // loc 1610 CGM.Int32Ty, // tid 1611 }; 1612 auto *FnTy = 1613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1614 return CGM.CreateRuntimeFunction(FnTy, Name); 1615 } 1616 1617 llvm::FunctionCallee 1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1619 assert((IVSize == 32 || IVSize == 64) && 1620 "IV size is not compatible with the omp runtime"); 1621 StringRef Name = 1622 IVSize == 32 1623 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1624 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1626 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1631 PtrTy, // p_lower 1632 PtrTy, // p_upper 1633 PtrTy // p_stride 1634 }; 1635 auto *FnTy = 1636 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1637 return CGM.CreateRuntimeFunction(FnTy, Name); 1638 } 1639 1640 /// Obtain information that uniquely identifies a target entry. This 1641 /// consists of the file and device IDs as well as line number associated with 1642 /// the relevant entry source location. 1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1644 unsigned &DeviceID, unsigned &FileID, 1645 unsigned &LineNum) { 1646 SourceManager &SM = C.getSourceManager(); 1647 1648 // The loc should be always valid and have a file ID (the user cannot use 1649 // #pragma directives in macros) 1650 1651 assert(Loc.isValid() && "Source location is expected to be always valid."); 1652 1653 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1654 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1655 1656 llvm::sys::fs::UniqueID ID; 1657 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1658 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1659 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1662 << PLoc.getFilename() << EC.message(); 1663 } 1664 1665 DeviceID = ID.getDevice(); 1666 FileID = ID.getFile(); 1667 LineNum = PLoc.getLine(); 1668 } 1669 1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1671 if (CGM.getLangOpts().OpenMPSimd) 1672 return Address::invalid(); 1673 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1674 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1675 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1676 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1677 HasRequiresUnifiedSharedMemory))) { 1678 SmallString<64> PtrName; 1679 { 1680 llvm::raw_svector_ostream OS(PtrName); 1681 OS << CGM.getMangledName(GlobalDecl(VD)); 1682 if (!VD->isExternallyVisible()) { 1683 unsigned DeviceID, FileID, Line; 1684 getTargetEntryUniqueInfo(CGM.getContext(), 1685 VD->getCanonicalDecl()->getBeginLoc(), 1686 DeviceID, FileID, Line); 1687 OS << llvm::format("_%x", FileID); 1688 } 1689 OS << "_decl_tgt_ref_ptr"; 1690 } 1691 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1693 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); 1694 if (!Ptr) { 1695 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); 1696 1697 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1698 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1699 1700 if (!CGM.getLangOpts().OpenMPIsDevice) 1701 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1702 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1703 } 1704 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1705 } 1706 return Address::invalid(); 1707 } 1708 1709 llvm::Constant * 1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1711 assert(!CGM.getLangOpts().OpenMPUseTLS || 1712 !CGM.getContext().getTargetInfo().isTLSSupported()); 1713 // Lookup the entry, lazily creating it if necessary. 1714 std::string Suffix = getName({"cache", ""}); 1715 return getOrCreateInternalVariable( 1716 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1717 } 1718 1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1720 const VarDecl *VD, 1721 Address VDAddr, 1722 SourceLocation Loc) { 1723 if (CGM.getLangOpts().OpenMPUseTLS && 1724 CGM.getContext().getTargetInfo().isTLSSupported()) 1725 return VDAddr; 1726 1727 llvm::Type *VarTy = VDAddr.getElementType(); 1728 llvm::Value *Args[] = { 1729 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1730 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), 1731 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1732 getOrCreateThreadPrivateCache(VD)}; 1733 return Address( 1734 CGF.EmitRuntimeCall( 1735 OMPBuilder.getOrCreateRuntimeFunction( 1736 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1737 Args), 1738 CGF.Int8Ty, VDAddr.getAlignment()); 1739 } 1740 1741 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1742 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1743 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1744 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1745 // library. 1746 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1747 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1748 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1749 OMPLoc); 1750 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1751 // to register constructor/destructor for variable. 1752 llvm::Value *Args[] = { 1753 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1754 Ctor, CopyCtor, Dtor}; 1755 CGF.EmitRuntimeCall( 1756 OMPBuilder.getOrCreateRuntimeFunction( 1757 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1758 Args); 1759 } 1760 1761 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1762 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1763 bool PerformInit, CodeGenFunction *CGF) { 1764 if (CGM.getLangOpts().OpenMPUseTLS && 1765 CGM.getContext().getTargetInfo().isTLSSupported()) 1766 return nullptr; 1767 1768 VD = VD->getDefinition(CGM.getContext()); 1769 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1770 QualType ASTTy = VD->getType(); 1771 1772 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1773 const Expr *Init = VD->getAnyInitializer(); 1774 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1775 // Generate function that re-emits the declaration's initializer into the 1776 // threadprivate copy of the variable VD 1777 CodeGenFunction CtorCGF(CGM); 1778 FunctionArgList Args; 1779 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1780 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1781 ImplicitParamDecl::Other); 1782 Args.push_back(&Dst); 1783 1784 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1785 CGM.getContext().VoidPtrTy, Args); 1786 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1787 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1788 llvm::Function *Fn = 1789 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1790 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1791 Args, Loc, Loc); 1792 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1793 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1794 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1795 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); 1796 Arg = CtorCGF.Builder.CreateElementBitCast( 1797 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1798 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1799 /*IsInitializer=*/true); 1800 ArgVal = CtorCGF.EmitLoadOfScalar( 1801 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1802 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1803 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1804 CtorCGF.FinishFunction(); 1805 Ctor = Fn; 1806 } 1807 if (VD->getType().isDestructedType() != QualType::DK_none) { 1808 // Generate function that emits destructor call for the threadprivate copy 1809 // of the variable VD 1810 CodeGenFunction DtorCGF(CGM); 1811 FunctionArgList Args; 1812 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1813 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1814 ImplicitParamDecl::Other); 1815 Args.push_back(&Dst); 1816 1817 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1818 CGM.getContext().VoidTy, Args); 1819 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1820 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1821 llvm::Function *Fn = 1822 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1823 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1824 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1825 Loc, Loc); 1826 // Create a scope with an artificial location for the body of this function. 1827 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1828 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1829 DtorCGF.GetAddrOfLocalVar(&Dst), 1830 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1831 DtorCGF.emitDestroy( 1832 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1833 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1834 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1835 DtorCGF.FinishFunction(); 1836 Dtor = Fn; 1837 } 1838 // Do not emit init function if it is not required. 1839 if (!Ctor && !Dtor) 1840 return nullptr; 1841 1842 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1843 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1844 /*isVarArg=*/false) 1845 ->getPointerTo(); 1846 // Copying constructor for the threadprivate variable. 1847 // Must be NULL - reserved by runtime, but currently it requires that this 1848 // parameter is always NULL. Otherwise it fires assertion. 1849 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1850 if (Ctor == nullptr) { 1851 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1852 /*isVarArg=*/false) 1853 ->getPointerTo(); 1854 Ctor = llvm::Constant::getNullValue(CtorTy); 1855 } 1856 if (Dtor == nullptr) { 1857 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1858 /*isVarArg=*/false) 1859 ->getPointerTo(); 1860 Dtor = llvm::Constant::getNullValue(DtorTy); 1861 } 1862 if (!CGF) { 1863 auto *InitFunctionTy = 1864 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1865 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1866 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1867 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1868 CodeGenFunction InitCGF(CGM); 1869 FunctionArgList ArgList; 1870 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1871 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1872 Loc, Loc); 1873 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1874 InitCGF.FinishFunction(); 1875 return InitFunction; 1876 } 1877 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 } 1879 return nullptr; 1880 } 1881 1882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1883 llvm::GlobalVariable *Addr, 1884 bool PerformInit) { 1885 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1886 !CGM.getLangOpts().OpenMPIsDevice) 1887 return false; 1888 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1889 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1890 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1891 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1892 HasRequiresUnifiedSharedMemory)) 1893 return CGM.getLangOpts().OpenMPIsDevice; 1894 VD = VD->getDefinition(CGM.getContext()); 1895 assert(VD && "Unknown VarDecl"); 1896 1897 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1898 return CGM.getLangOpts().OpenMPIsDevice; 1899 1900 QualType ASTTy = VD->getType(); 1901 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1902 1903 // Produce the unique prefix to identify the new target regions. We use 1904 // the source location of the variable declaration which we know to not 1905 // conflict with any target region. 1906 unsigned DeviceID; 1907 unsigned FileID; 1908 unsigned Line; 1909 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1910 SmallString<128> Buffer, Out; 1911 { 1912 llvm::raw_svector_ostream OS(Buffer); 1913 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1914 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1915 } 1916 1917 const Expr *Init = VD->getAnyInitializer(); 1918 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1919 llvm::Constant *Ctor; 1920 llvm::Constant *ID; 1921 if (CGM.getLangOpts().OpenMPIsDevice) { 1922 // Generate function that re-emits the declaration's initializer into 1923 // the threadprivate copy of the variable VD 1924 CodeGenFunction CtorCGF(CGM); 1925 1926 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1927 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1928 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1929 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1930 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1931 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1932 FunctionArgList(), Loc, Loc); 1933 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1934 llvm::Constant *AddrInAS0 = Addr; 1935 if (Addr->getAddressSpace() != 0) 1936 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1937 Addr, llvm::PointerType::getWithSamePointeeType( 1938 cast<llvm::PointerType>(Addr->getType()), 0)); 1939 CtorCGF.EmitAnyExprToMem(Init, 1940 Address(AddrInAS0, Addr->getValueType(), 1941 CGM.getContext().getDeclAlign(VD)), 1942 Init->getType().getQualifiers(), 1943 /*IsInitializer=*/true); 1944 CtorCGF.FinishFunction(); 1945 Ctor = Fn; 1946 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1947 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1948 } else { 1949 Ctor = new llvm::GlobalVariable( 1950 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1951 llvm::GlobalValue::PrivateLinkage, 1952 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1953 ID = Ctor; 1954 } 1955 1956 // Register the information for the entry associated with the constructor. 1957 Out.clear(); 1958 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1959 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1960 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1961 } 1962 if (VD->getType().isDestructedType() != QualType::DK_none) { 1963 llvm::Constant *Dtor; 1964 llvm::Constant *ID; 1965 if (CGM.getLangOpts().OpenMPIsDevice) { 1966 // Generate function that emits destructor call for the threadprivate 1967 // copy of the variable VD 1968 CodeGenFunction DtorCGF(CGM); 1969 1970 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1971 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1972 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1973 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1974 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1975 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1976 FunctionArgList(), Loc, Loc); 1977 // Create a scope with an artificial location for the body of this 1978 // function. 1979 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1980 llvm::Constant *AddrInAS0 = Addr; 1981 if (Addr->getAddressSpace() != 0) 1982 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( 1983 Addr, llvm::PointerType::getWithSamePointeeType( 1984 cast<llvm::PointerType>(Addr->getType()), 0)); 1985 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), 1986 CGM.getContext().getDeclAlign(VD)), 1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1989 DtorCGF.FinishFunction(); 1990 Dtor = Fn; 1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1993 } else { 1994 Dtor = new llvm::GlobalVariable( 1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1996 llvm::GlobalValue::PrivateLinkage, 1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1998 ID = Dtor; 1999 } 2000 // Register the information for the entry associated with the destructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2005 } 2006 return CGM.getLangOpts().OpenMPIsDevice; 2007 } 2008 2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2010 QualType VarType, 2011 StringRef Name) { 2012 std::string Suffix = getName({"artificial", ""}); 2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2014 llvm::GlobalVariable *GAddr = 2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2017 CGM.getTarget().isTLSSupported()) { 2018 GAddr->setThreadLocal(/*Val=*/true); 2019 return Address(GAddr, GAddr->getValueType(), 2020 CGM.getContext().getTypeAlignInChars(VarType)); 2021 } 2022 std::string CacheSuffix = getName({"cache", ""}); 2023 llvm::Value *Args[] = { 2024 emitUpdateLocation(CGF, SourceLocation()), 2025 getThreadID(CGF, SourceLocation()), 2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2028 /*isSigned=*/false), 2029 getOrCreateInternalVariable( 2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2031 return Address( 2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2033 CGF.EmitRuntimeCall( 2034 OMPBuilder.getOrCreateRuntimeFunction( 2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2036 Args), 2037 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2038 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 2039 } 2040 2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2042 const RegionCodeGenTy &ThenGen, 2043 const RegionCodeGenTy &ElseGen) { 2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2045 2046 // If the condition constant folds and can be elided, try to avoid emitting 2047 // the condition and the dead arm of the if/else. 2048 bool CondConstant; 2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2050 if (CondConstant) 2051 ThenGen(CGF); 2052 else 2053 ElseGen(CGF); 2054 return; 2055 } 2056 2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2058 // emit the conditional branch. 2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2063 2064 // Emit the 'then' code. 2065 CGF.EmitBlock(ThenBlock); 2066 ThenGen(CGF); 2067 CGF.EmitBranch(ContBlock); 2068 // Emit the 'else' code if present. 2069 // There is no need to emit line number for unconditional branch. 2070 (void)ApplyDebugLocation::CreateEmpty(CGF); 2071 CGF.EmitBlock(ElseBlock); 2072 ElseGen(CGF); 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBranch(ContBlock); 2076 // Emit the continuation block for code after the if. 2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2078 } 2079 2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2081 llvm::Function *OutlinedFn, 2082 ArrayRef<llvm::Value *> CapturedVars, 2083 const Expr *IfCond, 2084 llvm::Value *NumThreads) { 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2088 auto &M = CGM.getModule(); 2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2090 this](CodeGenFunction &CGF, PrePostActionTy &) { 2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2093 llvm::Value *Args[] = { 2094 RTLoc, 2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2097 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2098 RealArgs.append(std::begin(Args), std::end(Args)); 2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 2101 llvm::FunctionCallee RTLFn = 2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2103 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2104 }; 2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2106 this](CodeGenFunction &CGF, PrePostActionTy &) { 2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2109 // Build calls: 2110 // __kmpc_serialized_parallel(&Loc, GTid); 2111 llvm::Value *Args[] = {RTLoc, ThreadID}; 2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2113 M, OMPRTL___kmpc_serialized_parallel), 2114 Args); 2115 2116 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2118 Address ZeroAddrBound = 2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2120 /*Name=*/".bound.zero.addr"); 2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2123 // ThreadId for serialized parallels is 0. 2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2127 2128 // Ensure we do not inline the function. This is trivially true for the ones 2129 // passed to __kmpc_fork_call but the ones called in serialized regions 2130 // could be inlined. This is not a perfect but it is closer to the invariant 2131 // we want, namely, every data environment starts with a new function. 2132 // TODO: We should pass the if condition to the runtime function and do the 2133 // handling there. Much cleaner code. 2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2137 2138 // __kmpc_end_serialized_parallel(&Loc, GTid); 2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2141 M, OMPRTL___kmpc_end_serialized_parallel), 2142 EndArgs); 2143 }; 2144 if (IfCond) { 2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2146 } else { 2147 RegionCodeGenTy ThenRCG(ThenGen); 2148 ThenRCG(CGF); 2149 } 2150 } 2151 2152 // If we're inside an (outlined) parallel region, use the region info's 2153 // thread-ID variable (it is passed in a first argument of the outlined function 2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2155 // regular serial code region, get thread ID by calling kmp_int32 2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2157 // return the address of that temp. 2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2159 SourceLocation Loc) { 2160 if (auto *OMPRegionInfo = 2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2162 if (OMPRegionInfo->getThreadIDVariable()) 2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2164 2165 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2166 QualType Int32Ty = 2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2169 CGF.EmitStoreOfScalar(ThreadID, 2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2171 2172 return ThreadIDTemp; 2173 } 2174 2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2177 SmallString<256> Buffer; 2178 llvm::raw_svector_ostream Out(Buffer); 2179 Out << Name; 2180 StringRef RuntimeName = Out.str(); 2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2182 if (Elem.second) { 2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2184 "OMP internal variable has different type than requested"); 2185 return &*Elem.second; 2186 } 2187 2188 return Elem.second = new llvm::GlobalVariable( 2189 CGM.getModule(), Ty, /*IsConstant*/ false, 2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2191 Elem.first(), /*InsertBefore=*/nullptr, 2192 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2193 } 2194 2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2197 std::string Name = getName({Prefix, "var"}); 2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2199 } 2200 2201 namespace { 2202 /// Common pre(post)-action for different OpenMP constructs. 2203 class CommonActionTy final : public PrePostActionTy { 2204 llvm::FunctionCallee EnterCallee; 2205 ArrayRef<llvm::Value *> EnterArgs; 2206 llvm::FunctionCallee ExitCallee; 2207 ArrayRef<llvm::Value *> ExitArgs; 2208 bool Conditional; 2209 llvm::BasicBlock *ContBlock = nullptr; 2210 2211 public: 2212 CommonActionTy(llvm::FunctionCallee EnterCallee, 2213 ArrayRef<llvm::Value *> EnterArgs, 2214 llvm::FunctionCallee ExitCallee, 2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2217 ExitArgs(ExitArgs), Conditional(Conditional) {} 2218 void Enter(CodeGenFunction &CGF) override { 2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2220 if (Conditional) { 2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2223 ContBlock = CGF.createBasicBlock("omp_if.end"); 2224 // Generate the branch (If-stmt) 2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2226 CGF.EmitBlock(ThenBlock); 2227 } 2228 } 2229 void Done(CodeGenFunction &CGF) { 2230 // Emit the rest of blocks/branches 2231 CGF.EmitBranch(ContBlock); 2232 CGF.EmitBlock(ContBlock, true); 2233 } 2234 void Exit(CodeGenFunction &CGF) override { 2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2236 } 2237 }; 2238 } // anonymous namespace 2239 2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2241 StringRef CriticalName, 2242 const RegionCodeGenTy &CriticalOpGen, 2243 SourceLocation Loc, const Expr *Hint) { 2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2245 // CriticalOpGen(); 2246 // __kmpc_end_critical(ident_t *, gtid, Lock); 2247 // Prepare arguments and build a call to __kmpc_critical 2248 if (!CGF.HaveInsertPoint()) 2249 return; 2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2251 getCriticalRegionLock(CriticalName)}; 2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2253 std::end(Args)); 2254 if (Hint) { 2255 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2257 } 2258 CommonActionTy Action( 2259 OMPBuilder.getOrCreateRuntimeFunction( 2260 CGM.getModule(), 2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2262 EnterArgs, 2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2264 OMPRTL___kmpc_end_critical), 2265 Args); 2266 CriticalOpGen.setAction(Action); 2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2268 } 2269 2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2271 const RegionCodeGenTy &MasterOpGen, 2272 SourceLocation Loc) { 2273 if (!CGF.HaveInsertPoint()) 2274 return; 2275 // if(__kmpc_master(ident_t *, gtid)) { 2276 // MasterOpGen(); 2277 // __kmpc_end_master(ident_t *, gtid); 2278 // } 2279 // Prepare arguments and build a call to __kmpc_master 2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2282 CGM.getModule(), OMPRTL___kmpc_master), 2283 Args, 2284 OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_end_master), 2286 Args, 2287 /*Conditional=*/true); 2288 MasterOpGen.setAction(Action); 2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2290 Action.Done(CGF); 2291 } 2292 2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2294 const RegionCodeGenTy &MaskedOpGen, 2295 SourceLocation Loc, const Expr *Filter) { 2296 if (!CGF.HaveInsertPoint()) 2297 return; 2298 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2299 // MaskedOpGen(); 2300 // __kmpc_end_masked(iden_t *, gtid); 2301 // } 2302 // Prepare arguments and build a call to __kmpc_masked 2303 llvm::Value *FilterVal = Filter 2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2307 FilterVal}; 2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2309 getThreadID(CGF, Loc)}; 2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2311 CGM.getModule(), OMPRTL___kmpc_masked), 2312 Args, 2313 OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_end_masked), 2315 ArgsEnd, 2316 /*Conditional=*/true); 2317 MaskedOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2319 Action.Done(CGF); 2320 } 2321 2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2323 SourceLocation Loc) { 2324 if (!CGF.HaveInsertPoint()) 2325 return; 2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2327 OMPBuilder.createTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2371 return Address( 2372 CGF.Builder.CreateBitCast( 2373 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2374 ElemTy, CGF.getContext().getDeclAlign(Var)); 2375 } 2376 2377 static llvm::Value *emitCopyprivateCopyFunction( 2378 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2379 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2380 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2381 SourceLocation Loc) { 2382 ASTContext &C = CGM.getContext(); 2383 // void copy_func(void *LHSArg, void *RHSArg); 2384 FunctionArgList Args; 2385 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2386 ImplicitParamDecl::Other); 2387 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2388 ImplicitParamDecl::Other); 2389 Args.push_back(&LHSArg); 2390 Args.push_back(&RHSArg); 2391 const auto &CGFI = 2392 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2393 std::string Name = 2394 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2395 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2396 llvm::GlobalValue::InternalLinkage, Name, 2397 &CGM.getModule()); 2398 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2399 Fn->setDoesNotRecurse(); 2400 CodeGenFunction CGF(CGM); 2401 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2402 // Dest = (void*[n])(LHSArg); 2403 // Src = (void*[n])(RHSArg); 2404 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2406 ArgsElemType->getPointerTo()), 2407 ArgsElemType, CGF.getPointerAlign()); 2408 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2410 ArgsElemType->getPointerTo()), 2411 ArgsElemType, CGF.getPointerAlign()); 2412 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2413 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2414 // ... 2415 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2416 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2417 const auto *DestVar = 2418 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2419 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2420 2421 const auto *SrcVar = 2422 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2423 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2424 2425 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2426 QualType Type = VD->getType(); 2427 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2428 } 2429 CGF.FinishFunction(); 2430 return Fn; 2431 } 2432 2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2434 const RegionCodeGenTy &SingleOpGen, 2435 SourceLocation Loc, 2436 ArrayRef<const Expr *> CopyprivateVars, 2437 ArrayRef<const Expr *> SrcExprs, 2438 ArrayRef<const Expr *> DstExprs, 2439 ArrayRef<const Expr *> AssignmentOps) { 2440 if (!CGF.HaveInsertPoint()) 2441 return; 2442 assert(CopyprivateVars.size() == SrcExprs.size() && 2443 CopyprivateVars.size() == DstExprs.size() && 2444 CopyprivateVars.size() == AssignmentOps.size()); 2445 ASTContext &C = CGM.getContext(); 2446 // int32 did_it = 0; 2447 // if(__kmpc_single(ident_t *, gtid)) { 2448 // SingleOpGen(); 2449 // __kmpc_end_single(ident_t *, gtid); 2450 // did_it = 1; 2451 // } 2452 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2453 // <copy_func>, did_it); 2454 2455 Address DidIt = Address::invalid(); 2456 if (!CopyprivateVars.empty()) { 2457 // int32 did_it = 0; 2458 QualType KmpInt32Ty = 2459 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2460 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2461 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2462 } 2463 // Prepare arguments and build a call to __kmpc_single 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_single), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_single), 2470 Args, 2471 /*Conditional=*/true); 2472 SingleOpGen.setAction(Action); 2473 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2474 if (DidIt.isValid()) { 2475 // did_it = 1; 2476 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2477 } 2478 Action.Done(CGF); 2479 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2480 // <copy_func>, did_it); 2481 if (DidIt.isValid()) { 2482 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2483 QualType CopyprivateArrayTy = C.getConstantArrayType( 2484 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2485 /*IndexTypeQuals=*/0); 2486 // Create a list of all private variables for copyprivate. 2487 Address CopyprivateList = 2488 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2489 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2490 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2491 CGF.Builder.CreateStore( 2492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2493 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2494 CGF.VoidPtrTy), 2495 Elem); 2496 } 2497 // Build function that copies private values from single region to all other 2498 // threads in the corresponding parallel region. 2499 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2500 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2501 SrcExprs, DstExprs, AssignmentOps, Loc); 2502 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2503 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2504 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2505 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2506 llvm::Value *Args[] = { 2507 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2508 getThreadID(CGF, Loc), // i32 <gtid> 2509 BufSize, // size_t <buf_size> 2510 CL.getPointer(), // void *<copyprivate list> 2511 CpyFn, // void (*) (void *, void *) <copy_func> 2512 DidItVal // i32 did_it 2513 }; 2514 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2515 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2516 Args); 2517 } 2518 } 2519 2520 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2521 const RegionCodeGenTy &OrderedOpGen, 2522 SourceLocation Loc, bool IsThreads) { 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // __kmpc_ordered(ident_t *, gtid); 2526 // OrderedOpGen(); 2527 // __kmpc_end_ordered(ident_t *, gtid); 2528 // Prepare arguments and build a call to __kmpc_ordered 2529 if (IsThreads) { 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2531 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2532 CGM.getModule(), OMPRTL___kmpc_ordered), 2533 Args, 2534 OMPBuilder.getOrCreateRuntimeFunction( 2535 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2536 Args); 2537 OrderedOpGen.setAction(Action); 2538 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2539 return; 2540 } 2541 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2542 } 2543 2544 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2545 unsigned Flags; 2546 if (Kind == OMPD_for) 2547 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2548 else if (Kind == OMPD_sections) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2550 else if (Kind == OMPD_single) 2551 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2552 else if (Kind == OMPD_barrier) 2553 Flags = OMP_IDENT_BARRIER_EXPL; 2554 else 2555 Flags = OMP_IDENT_BARRIER_IMPL; 2556 return Flags; 2557 } 2558 2559 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2560 CodeGenFunction &CGF, const OMPLoopDirective &S, 2561 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2562 // Check if the loop directive is actually a doacross loop directive. In this 2563 // case choose static, 1 schedule. 2564 if (llvm::any_of( 2565 S.getClausesOfKind<OMPOrderedClause>(), 2566 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2567 ScheduleKind = OMPC_SCHEDULE_static; 2568 // Chunk size is 1 in this case. 2569 llvm::APInt ChunkSize(32, 1); 2570 ChunkExpr = IntegerLiteral::Create( 2571 CGF.getContext(), ChunkSize, 2572 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2573 SourceLocation()); 2574 } 2575 } 2576 2577 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2578 OpenMPDirectiveKind Kind, bool EmitChecks, 2579 bool ForceSimpleCall) { 2580 // Check if we should use the OMPBuilder 2581 auto *OMPRegionInfo = 2582 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2583 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2584 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2585 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2586 return; 2587 } 2588 2589 if (!CGF.HaveInsertPoint()) 2590 return; 2591 // Build call __kmpc_cancel_barrier(loc, thread_id); 2592 // Build call __kmpc_barrier(loc, thread_id); 2593 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2594 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2595 // thread_id); 2596 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2597 getThreadID(CGF, Loc)}; 2598 if (OMPRegionInfo) { 2599 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2600 llvm::Value *Result = CGF.EmitRuntimeCall( 2601 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2602 OMPRTL___kmpc_cancel_barrier), 2603 Args); 2604 if (EmitChecks) { 2605 // if (__kmpc_cancel_barrier()) { 2606 // exit from construct; 2607 // } 2608 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2609 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2610 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2611 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2612 CGF.EmitBlock(ExitBB); 2613 // exit from construct; 2614 CodeGenFunction::JumpDest CancelDestination = 2615 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2616 CGF.EmitBranchThroughCleanup(CancelDestination); 2617 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2618 } 2619 return; 2620 } 2621 } 2622 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2623 CGM.getModule(), OMPRTL___kmpc_barrier), 2624 Args); 2625 } 2626 2627 /// Map the OpenMP loop schedule to the runtime enumeration. 2628 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2629 bool Chunked, bool Ordered) { 2630 switch (ScheduleKind) { 2631 case OMPC_SCHEDULE_static: 2632 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2633 : (Ordered ? OMP_ord_static : OMP_sch_static); 2634 case OMPC_SCHEDULE_dynamic: 2635 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2636 case OMPC_SCHEDULE_guided: 2637 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2638 case OMPC_SCHEDULE_runtime: 2639 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2640 case OMPC_SCHEDULE_auto: 2641 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2642 case OMPC_SCHEDULE_unknown: 2643 assert(!Chunked && "chunk was specified but schedule kind not known"); 2644 return Ordered ? OMP_ord_static : OMP_sch_static; 2645 } 2646 llvm_unreachable("Unexpected runtime schedule"); 2647 } 2648 2649 /// Map the OpenMP distribute schedule to the runtime enumeration. 2650 static OpenMPSchedType 2651 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2652 // only static is allowed for dist_schedule 2653 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2654 } 2655 2656 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2657 bool Chunked) const { 2658 OpenMPSchedType Schedule = 2659 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2660 return Schedule == OMP_sch_static; 2661 } 2662 2663 bool CGOpenMPRuntime::isStaticNonchunked( 2664 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2665 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2666 return Schedule == OMP_dist_sch_static; 2667 } 2668 2669 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2670 bool Chunked) const { 2671 OpenMPSchedType Schedule = 2672 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2673 return Schedule == OMP_sch_static_chunked; 2674 } 2675 2676 bool CGOpenMPRuntime::isStaticChunked( 2677 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2678 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2679 return Schedule == OMP_dist_sch_static_chunked; 2680 } 2681 2682 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2683 OpenMPSchedType Schedule = 2684 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2685 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2686 return Schedule != OMP_sch_static; 2687 } 2688 2689 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2690 OpenMPScheduleClauseModifier M1, 2691 OpenMPScheduleClauseModifier M2) { 2692 int Modifier = 0; 2693 switch (M1) { 2694 case OMPC_SCHEDULE_MODIFIER_monotonic: 2695 Modifier = OMP_sch_modifier_monotonic; 2696 break; 2697 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2698 Modifier = OMP_sch_modifier_nonmonotonic; 2699 break; 2700 case OMPC_SCHEDULE_MODIFIER_simd: 2701 if (Schedule == OMP_sch_static_chunked) 2702 Schedule = OMP_sch_static_balanced_chunked; 2703 break; 2704 case OMPC_SCHEDULE_MODIFIER_last: 2705 case OMPC_SCHEDULE_MODIFIER_unknown: 2706 break; 2707 } 2708 switch (M2) { 2709 case OMPC_SCHEDULE_MODIFIER_monotonic: 2710 Modifier = OMP_sch_modifier_monotonic; 2711 break; 2712 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2713 Modifier = OMP_sch_modifier_nonmonotonic; 2714 break; 2715 case OMPC_SCHEDULE_MODIFIER_simd: 2716 if (Schedule == OMP_sch_static_chunked) 2717 Schedule = OMP_sch_static_balanced_chunked; 2718 break; 2719 case OMPC_SCHEDULE_MODIFIER_last: 2720 case OMPC_SCHEDULE_MODIFIER_unknown: 2721 break; 2722 } 2723 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2724 // If the static schedule kind is specified or if the ordered clause is 2725 // specified, and if the nonmonotonic modifier is not specified, the effect is 2726 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2727 // modifier is specified, the effect is as if the nonmonotonic modifier is 2728 // specified. 2729 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2730 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2731 Schedule == OMP_sch_static_balanced_chunked || 2732 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2733 Schedule == OMP_dist_sch_static_chunked || 2734 Schedule == OMP_dist_sch_static)) 2735 Modifier = OMP_sch_modifier_nonmonotonic; 2736 } 2737 return Schedule | Modifier; 2738 } 2739 2740 void CGOpenMPRuntime::emitForDispatchInit( 2741 CodeGenFunction &CGF, SourceLocation Loc, 2742 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2743 bool Ordered, const DispatchRTInput &DispatchValues) { 2744 if (!CGF.HaveInsertPoint()) 2745 return; 2746 OpenMPSchedType Schedule = getRuntimeSchedule( 2747 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2748 assert(Ordered || 2749 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2750 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2751 Schedule != OMP_sch_static_balanced_chunked)); 2752 // Call __kmpc_dispatch_init( 2753 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2754 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2755 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2756 2757 // If the Chunk was not specified in the clause - use default value 1. 2758 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2759 : CGF.Builder.getIntN(IVSize, 1); 2760 llvm::Value *Args[] = { 2761 emitUpdateLocation(CGF, Loc), 2762 getThreadID(CGF, Loc), 2763 CGF.Builder.getInt32(addMonoNonMonoModifier( 2764 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2765 DispatchValues.LB, // Lower 2766 DispatchValues.UB, // Upper 2767 CGF.Builder.getIntN(IVSize, 1), // Stride 2768 Chunk // Chunk 2769 }; 2770 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2771 } 2772 2773 static void emitForStaticInitCall( 2774 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2775 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2776 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2777 const CGOpenMPRuntime::StaticRTInput &Values) { 2778 if (!CGF.HaveInsertPoint()) 2779 return; 2780 2781 assert(!Values.Ordered); 2782 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2783 Schedule == OMP_sch_static_balanced_chunked || 2784 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2785 Schedule == OMP_dist_sch_static || 2786 Schedule == OMP_dist_sch_static_chunked); 2787 2788 // Call __kmpc_for_static_init( 2789 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2790 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2791 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2792 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2793 llvm::Value *Chunk = Values.Chunk; 2794 if (Chunk == nullptr) { 2795 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2796 Schedule == OMP_dist_sch_static) && 2797 "expected static non-chunked schedule"); 2798 // If the Chunk was not specified in the clause - use default value 1. 2799 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2800 } else { 2801 assert((Schedule == OMP_sch_static_chunked || 2802 Schedule == OMP_sch_static_balanced_chunked || 2803 Schedule == OMP_ord_static_chunked || 2804 Schedule == OMP_dist_sch_static_chunked) && 2805 "expected static chunked schedule"); 2806 } 2807 llvm::Value *Args[] = { 2808 UpdateLocation, 2809 ThreadId, 2810 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2811 M2)), // Schedule type 2812 Values.IL.getPointer(), // &isLastIter 2813 Values.LB.getPointer(), // &LB 2814 Values.UB.getPointer(), // &UB 2815 Values.ST.getPointer(), // &Stride 2816 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2817 Chunk // Chunk 2818 }; 2819 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2820 } 2821 2822 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2823 SourceLocation Loc, 2824 OpenMPDirectiveKind DKind, 2825 const OpenMPScheduleTy &ScheduleKind, 2826 const StaticRTInput &Values) { 2827 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2828 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2829 assert(isOpenMPWorksharingDirective(DKind) && 2830 "Expected loop-based or sections-based directive."); 2831 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2832 isOpenMPLoopDirective(DKind) 2833 ? OMP_IDENT_WORK_LOOP 2834 : OMP_IDENT_WORK_SECTIONS); 2835 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2836 llvm::FunctionCallee StaticInitFunction = 2837 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2838 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2839 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2840 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2841 } 2842 2843 void CGOpenMPRuntime::emitDistributeStaticInit( 2844 CodeGenFunction &CGF, SourceLocation Loc, 2845 OpenMPDistScheduleClauseKind SchedKind, 2846 const CGOpenMPRuntime::StaticRTInput &Values) { 2847 OpenMPSchedType ScheduleNum = 2848 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2849 llvm::Value *UpdatedLocation = 2850 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2851 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2852 llvm::FunctionCallee StaticInitFunction; 2853 bool isGPUDistribute = 2854 CGM.getLangOpts().OpenMPIsDevice && 2855 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2856 StaticInitFunction = createForStaticInitFunction( 2857 Values.IVSize, Values.IVSigned, isGPUDistribute); 2858 2859 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2860 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2861 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2862 } 2863 2864 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2865 SourceLocation Loc, 2866 OpenMPDirectiveKind DKind) { 2867 if (!CGF.HaveInsertPoint()) 2868 return; 2869 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2870 llvm::Value *Args[] = { 2871 emitUpdateLocation(CGF, Loc, 2872 isOpenMPDistributeDirective(DKind) 2873 ? OMP_IDENT_WORK_DISTRIBUTE 2874 : isOpenMPLoopDirective(DKind) 2875 ? OMP_IDENT_WORK_LOOP 2876 : OMP_IDENT_WORK_SECTIONS), 2877 getThreadID(CGF, Loc)}; 2878 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2879 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2880 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2881 CGF.EmitRuntimeCall( 2882 OMPBuilder.getOrCreateRuntimeFunction( 2883 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2884 Args); 2885 else 2886 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2887 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2888 Args); 2889 } 2890 2891 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2892 SourceLocation Loc, 2893 unsigned IVSize, 2894 bool IVSigned) { 2895 if (!CGF.HaveInsertPoint()) 2896 return; 2897 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2898 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2899 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2900 } 2901 2902 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2903 SourceLocation Loc, unsigned IVSize, 2904 bool IVSigned, Address IL, 2905 Address LB, Address UB, 2906 Address ST) { 2907 // Call __kmpc_dispatch_next( 2908 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2909 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2910 // kmp_int[32|64] *p_stride); 2911 llvm::Value *Args[] = { 2912 emitUpdateLocation(CGF, Loc), 2913 getThreadID(CGF, Loc), 2914 IL.getPointer(), // &isLastIter 2915 LB.getPointer(), // &Lower 2916 UB.getPointer(), // &Upper 2917 ST.getPointer() // &Stride 2918 }; 2919 llvm::Value *Call = 2920 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2921 return CGF.EmitScalarConversion( 2922 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2923 CGF.getContext().BoolTy, Loc); 2924 } 2925 2926 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2927 llvm::Value *NumThreads, 2928 SourceLocation Loc) { 2929 if (!CGF.HaveInsertPoint()) 2930 return; 2931 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2932 llvm::Value *Args[] = { 2933 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2934 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2935 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2936 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2937 Args); 2938 } 2939 2940 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2941 ProcBindKind ProcBind, 2942 SourceLocation Loc) { 2943 if (!CGF.HaveInsertPoint()) 2944 return; 2945 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2946 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2947 llvm::Value *Args[] = { 2948 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2949 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2950 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2951 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2952 Args); 2953 } 2954 2955 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2956 SourceLocation Loc, llvm::AtomicOrdering AO) { 2957 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2958 OMPBuilder.createFlush(CGF.Builder); 2959 } else { 2960 if (!CGF.HaveInsertPoint()) 2961 return; 2962 // Build call void __kmpc_flush(ident_t *loc) 2963 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2964 CGM.getModule(), OMPRTL___kmpc_flush), 2965 emitUpdateLocation(CGF, Loc)); 2966 } 2967 } 2968 2969 namespace { 2970 /// Indexes of fields for type kmp_task_t. 2971 enum KmpTaskTFields { 2972 /// List of shared variables. 2973 KmpTaskTShareds, 2974 /// Task routine. 2975 KmpTaskTRoutine, 2976 /// Partition id for the untied tasks. 2977 KmpTaskTPartId, 2978 /// Function with call of destructors for private variables. 2979 Data1, 2980 /// Task priority. 2981 Data2, 2982 /// (Taskloops only) Lower bound. 2983 KmpTaskTLowerBound, 2984 /// (Taskloops only) Upper bound. 2985 KmpTaskTUpperBound, 2986 /// (Taskloops only) Stride. 2987 KmpTaskTStride, 2988 /// (Taskloops only) Is last iteration flag. 2989 KmpTaskTLastIter, 2990 /// (Taskloops only) Reduction data. 2991 KmpTaskTReductions, 2992 }; 2993 } // anonymous namespace 2994 2995 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2996 return OffloadEntriesTargetRegion.empty() && 2997 OffloadEntriesDeviceGlobalVar.empty(); 2998 } 2999 3000 /// Initialize target region entry. 3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3002 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3003 StringRef ParentName, unsigned LineNum, 3004 unsigned Order) { 3005 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3006 "only required for the device " 3007 "code generation."); 3008 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3009 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3010 OMPTargetRegionEntryTargetRegion); 3011 ++OffloadingEntriesNum; 3012 } 3013 3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3015 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3016 StringRef ParentName, unsigned LineNum, 3017 llvm::Constant *Addr, llvm::Constant *ID, 3018 OMPTargetRegionEntryKind Flags) { 3019 // If we are emitting code for a target, the entry is already initialized, 3020 // only has to be registered. 3021 if (CGM.getLangOpts().OpenMPIsDevice) { 3022 // This could happen if the device compilation is invoked standalone. 3023 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3024 return; 3025 auto &Entry = 3026 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3027 Entry.setAddress(Addr); 3028 Entry.setID(ID); 3029 Entry.setFlags(Flags); 3030 } else { 3031 if (Flags == 3032 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3033 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3034 /*IgnoreAddressId*/ true)) 3035 return; 3036 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3037 "Target region entry already registered!"); 3038 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3039 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3040 ++OffloadingEntriesNum; 3041 } 3042 } 3043 3044 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3045 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3046 bool IgnoreAddressId) const { 3047 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3048 if (PerDevice == OffloadEntriesTargetRegion.end()) 3049 return false; 3050 auto PerFile = PerDevice->second.find(FileID); 3051 if (PerFile == PerDevice->second.end()) 3052 return false; 3053 auto PerParentName = PerFile->second.find(ParentName); 3054 if (PerParentName == PerFile->second.end()) 3055 return false; 3056 auto PerLine = PerParentName->second.find(LineNum); 3057 if (PerLine == PerParentName->second.end()) 3058 return false; 3059 // Fail if this entry is already registered. 3060 if (!IgnoreAddressId && 3061 (PerLine->second.getAddress() || PerLine->second.getID())) 3062 return false; 3063 return true; 3064 } 3065 3066 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3067 const OffloadTargetRegionEntryInfoActTy &Action) { 3068 // Scan all target region entries and perform the provided action. 3069 for (const auto &D : OffloadEntriesTargetRegion) 3070 for (const auto &F : D.second) 3071 for (const auto &P : F.second) 3072 for (const auto &L : P.second) 3073 Action(D.first, F.first, P.first(), L.first, L.second); 3074 } 3075 3076 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3077 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3078 OMPTargetGlobalVarEntryKind Flags, 3079 unsigned Order) { 3080 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3081 "only required for the device " 3082 "code generation."); 3083 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3084 ++OffloadingEntriesNum; 3085 } 3086 3087 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3088 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3089 CharUnits VarSize, 3090 OMPTargetGlobalVarEntryKind Flags, 3091 llvm::GlobalValue::LinkageTypes Linkage) { 3092 if (CGM.getLangOpts().OpenMPIsDevice) { 3093 // This could happen if the device compilation is invoked standalone. 3094 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3095 return; 3096 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3097 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3098 if (Entry.getVarSize().isZero()) { 3099 Entry.setVarSize(VarSize); 3100 Entry.setLinkage(Linkage); 3101 } 3102 return; 3103 } 3104 Entry.setVarSize(VarSize); 3105 Entry.setLinkage(Linkage); 3106 Entry.setAddress(Addr); 3107 } else { 3108 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3109 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3110 assert(Entry.isValid() && Entry.getFlags() == Flags && 3111 "Entry not initialized!"); 3112 if (Entry.getVarSize().isZero()) { 3113 Entry.setVarSize(VarSize); 3114 Entry.setLinkage(Linkage); 3115 } 3116 return; 3117 } 3118 OffloadEntriesDeviceGlobalVar.try_emplace( 3119 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3120 ++OffloadingEntriesNum; 3121 } 3122 } 3123 3124 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3125 actOnDeviceGlobalVarEntriesInfo( 3126 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3127 // Scan all target region entries and perform the provided action. 3128 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3129 Action(E.getKey(), E.getValue()); 3130 } 3131 3132 void CGOpenMPRuntime::createOffloadEntry( 3133 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3134 llvm::GlobalValue::LinkageTypes Linkage) { 3135 StringRef Name = Addr->getName(); 3136 llvm::Module &M = CGM.getModule(); 3137 llvm::LLVMContext &C = M.getContext(); 3138 3139 // Create constant string with the name. 3140 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3141 3142 std::string StringName = getName({"omp_offloading", "entry_name"}); 3143 auto *Str = new llvm::GlobalVariable( 3144 M, StrPtrInit->getType(), /*isConstant=*/true, 3145 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3146 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3147 3148 llvm::Constant *Data[] = { 3149 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3150 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3151 llvm::ConstantInt::get(CGM.SizeTy, Size), 3152 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3153 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3154 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3155 llvm::GlobalVariable *Entry = createGlobalStruct( 3156 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3157 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3158 3159 // The entry has to be created in the section the linker expects it to be. 3160 Entry->setSection("omp_offloading_entries"); 3161 } 3162 3163 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3164 // Emit the offloading entries and metadata so that the device codegen side 3165 // can easily figure out what to emit. The produced metadata looks like 3166 // this: 3167 // 3168 // !omp_offload.info = !{!1, ...} 3169 // 3170 // Right now we only generate metadata for function that contain target 3171 // regions. 3172 3173 // If we are in simd mode or there are no entries, we don't need to do 3174 // anything. 3175 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3176 return; 3177 3178 llvm::Module &M = CGM.getModule(); 3179 llvm::LLVMContext &C = M.getContext(); 3180 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3181 SourceLocation, StringRef>, 3182 16> 3183 OrderedEntries(OffloadEntriesInfoManager.size()); 3184 llvm::SmallVector<StringRef, 16> ParentFunctions( 3185 OffloadEntriesInfoManager.size()); 3186 3187 // Auxiliary methods to create metadata values and strings. 3188 auto &&GetMDInt = [this](unsigned V) { 3189 return llvm::ConstantAsMetadata::get( 3190 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3191 }; 3192 3193 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3194 3195 // Create the offloading info metadata node. 3196 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3197 3198 // Create function that emits metadata for each target region entry; 3199 auto &&TargetRegionMetadataEmitter = 3200 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3201 &GetMDString]( 3202 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3203 unsigned Line, 3204 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3205 // Generate metadata for target regions. Each entry of this metadata 3206 // contains: 3207 // - Entry 0 -> Kind of this type of metadata (0). 3208 // - Entry 1 -> Device ID of the file where the entry was identified. 3209 // - Entry 2 -> File ID of the file where the entry was identified. 3210 // - Entry 3 -> Mangled name of the function where the entry was 3211 // identified. 3212 // - Entry 4 -> Line in the file where the entry was identified. 3213 // - Entry 5 -> Order the entry was created. 3214 // The first element of the metadata node is the kind. 3215 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3216 GetMDInt(FileID), GetMDString(ParentName), 3217 GetMDInt(Line), GetMDInt(E.getOrder())}; 3218 3219 SourceLocation Loc; 3220 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3221 E = CGM.getContext().getSourceManager().fileinfo_end(); 3222 I != E; ++I) { 3223 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3224 I->getFirst()->getUniqueID().getFile() == FileID) { 3225 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3226 I->getFirst(), Line, 1); 3227 break; 3228 } 3229 } 3230 // Save this entry in the right position of the ordered entries array. 3231 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3232 ParentFunctions[E.getOrder()] = ParentName; 3233 3234 // Add metadata to the named metadata node. 3235 MD->addOperand(llvm::MDNode::get(C, Ops)); 3236 }; 3237 3238 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3239 TargetRegionMetadataEmitter); 3240 3241 // Create function that emits metadata for each device global variable entry; 3242 auto &&DeviceGlobalVarMetadataEmitter = 3243 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3244 MD](StringRef MangledName, 3245 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3246 &E) { 3247 // Generate metadata for global variables. Each entry of this metadata 3248 // contains: 3249 // - Entry 0 -> Kind of this type of metadata (1). 3250 // - Entry 1 -> Mangled name of the variable. 3251 // - Entry 2 -> Declare target kind. 3252 // - Entry 3 -> Order the entry was created. 3253 // The first element of the metadata node is the kind. 3254 llvm::Metadata *Ops[] = { 3255 GetMDInt(E.getKind()), GetMDString(MangledName), 3256 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3257 3258 // Save this entry in the right position of the ordered entries array. 3259 OrderedEntries[E.getOrder()] = 3260 std::make_tuple(&E, SourceLocation(), MangledName); 3261 3262 // Add metadata to the named metadata node. 3263 MD->addOperand(llvm::MDNode::get(C, Ops)); 3264 }; 3265 3266 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3267 DeviceGlobalVarMetadataEmitter); 3268 3269 for (const auto &E : OrderedEntries) { 3270 assert(std::get<0>(E) && "All ordered entries must exist!"); 3271 if (const auto *CE = 3272 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3273 std::get<0>(E))) { 3274 if (!CE->getID() || !CE->getAddress()) { 3275 // Do not blame the entry if the parent funtion is not emitted. 3276 StringRef FnName = ParentFunctions[CE->getOrder()]; 3277 if (!CGM.GetGlobalValue(FnName)) 3278 continue; 3279 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3280 DiagnosticsEngine::Error, 3281 "Offloading entry for target region in %0 is incorrect: either the " 3282 "address or the ID is invalid."); 3283 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3284 continue; 3285 } 3286 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3287 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3288 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3289 OffloadEntryInfoDeviceGlobalVar>( 3290 std::get<0>(E))) { 3291 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3292 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3293 CE->getFlags()); 3294 switch (Flags) { 3295 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3296 if (CGM.getLangOpts().OpenMPIsDevice && 3297 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3298 continue; 3299 if (!CE->getAddress()) { 3300 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3301 DiagnosticsEngine::Error, "Offloading entry for declare target " 3302 "variable %0 is incorrect: the " 3303 "address is invalid."); 3304 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3305 continue; 3306 } 3307 // The vaiable has no definition - no need to add the entry. 3308 if (CE->getVarSize().isZero()) 3309 continue; 3310 break; 3311 } 3312 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3313 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3314 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3315 "Declaret target link address is set."); 3316 if (CGM.getLangOpts().OpenMPIsDevice) 3317 continue; 3318 if (!CE->getAddress()) { 3319 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3320 DiagnosticsEngine::Error, 3321 "Offloading entry for declare target variable is incorrect: the " 3322 "address is invalid."); 3323 CGM.getDiags().Report(DiagID); 3324 continue; 3325 } 3326 break; 3327 } 3328 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3329 CE->getVarSize().getQuantity(), Flags, 3330 CE->getLinkage()); 3331 } else { 3332 llvm_unreachable("Unsupported entry kind."); 3333 } 3334 } 3335 } 3336 3337 /// Loads all the offload entries information from the host IR 3338 /// metadata. 3339 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3340 // If we are in target mode, load the metadata from the host IR. This code has 3341 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3342 3343 if (!CGM.getLangOpts().OpenMPIsDevice) 3344 return; 3345 3346 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3347 return; 3348 3349 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3350 if (auto EC = Buf.getError()) { 3351 CGM.getDiags().Report(diag::err_cannot_open_file) 3352 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3353 return; 3354 } 3355 3356 llvm::LLVMContext C; 3357 auto ME = expectedToErrorOrAndEmitErrors( 3358 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3359 3360 if (auto EC = ME.getError()) { 3361 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3362 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3363 CGM.getDiags().Report(DiagID) 3364 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3365 return; 3366 } 3367 3368 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3369 if (!MD) 3370 return; 3371 3372 for (llvm::MDNode *MN : MD->operands()) { 3373 auto &&GetMDInt = [MN](unsigned Idx) { 3374 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3375 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3376 }; 3377 3378 auto &&GetMDString = [MN](unsigned Idx) { 3379 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3380 return V->getString(); 3381 }; 3382 3383 switch (GetMDInt(0)) { 3384 default: 3385 llvm_unreachable("Unexpected metadata!"); 3386 break; 3387 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3388 OffloadingEntryInfoTargetRegion: 3389 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3390 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3391 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3392 /*Order=*/GetMDInt(5)); 3393 break; 3394 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3395 OffloadingEntryInfoDeviceGlobalVar: 3396 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3397 /*MangledName=*/GetMDString(1), 3398 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3399 /*Flags=*/GetMDInt(2)), 3400 /*Order=*/GetMDInt(3)); 3401 break; 3402 } 3403 } 3404 } 3405 3406 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3407 if (!KmpRoutineEntryPtrTy) { 3408 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3409 ASTContext &C = CGM.getContext(); 3410 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3411 FunctionProtoType::ExtProtoInfo EPI; 3412 KmpRoutineEntryPtrQTy = C.getPointerType( 3413 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3414 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3415 } 3416 } 3417 3418 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3419 // Make sure the type of the entry is already created. This is the type we 3420 // have to create: 3421 // struct __tgt_offload_entry{ 3422 // void *addr; // Pointer to the offload entry info. 3423 // // (function or global) 3424 // char *name; // Name of the function or global. 3425 // size_t size; // Size of the entry info (0 if it a function). 3426 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3427 // int32_t reserved; // Reserved, to use by the runtime library. 3428 // }; 3429 if (TgtOffloadEntryQTy.isNull()) { 3430 ASTContext &C = CGM.getContext(); 3431 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3432 RD->startDefinition(); 3433 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3434 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3435 addFieldToRecordDecl(C, RD, C.getSizeType()); 3436 addFieldToRecordDecl( 3437 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3438 addFieldToRecordDecl( 3439 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3440 RD->completeDefinition(); 3441 RD->addAttr(PackedAttr::CreateImplicit(C)); 3442 TgtOffloadEntryQTy = C.getRecordType(RD); 3443 } 3444 return TgtOffloadEntryQTy; 3445 } 3446 3447 namespace { 3448 struct PrivateHelpersTy { 3449 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3450 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3451 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3452 PrivateElemInit(PrivateElemInit) {} 3453 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3454 const Expr *OriginalRef = nullptr; 3455 const VarDecl *Original = nullptr; 3456 const VarDecl *PrivateCopy = nullptr; 3457 const VarDecl *PrivateElemInit = nullptr; 3458 bool isLocalPrivate() const { 3459 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3460 } 3461 }; 3462 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3463 } // anonymous namespace 3464 3465 static bool isAllocatableDecl(const VarDecl *VD) { 3466 const VarDecl *CVD = VD->getCanonicalDecl(); 3467 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3468 return false; 3469 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3470 // Use the default allocation. 3471 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3472 !AA->getAllocator()); 3473 } 3474 3475 static RecordDecl * 3476 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3477 if (!Privates.empty()) { 3478 ASTContext &C = CGM.getContext(); 3479 // Build struct .kmp_privates_t. { 3480 // /* private vars */ 3481 // }; 3482 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3483 RD->startDefinition(); 3484 for (const auto &Pair : Privates) { 3485 const VarDecl *VD = Pair.second.Original; 3486 QualType Type = VD->getType().getNonReferenceType(); 3487 // If the private variable is a local variable with lvalue ref type, 3488 // allocate the pointer instead of the pointee type. 3489 if (Pair.second.isLocalPrivate()) { 3490 if (VD->getType()->isLValueReferenceType()) 3491 Type = C.getPointerType(Type); 3492 if (isAllocatableDecl(VD)) 3493 Type = C.getPointerType(Type); 3494 } 3495 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3496 if (VD->hasAttrs()) { 3497 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3498 E(VD->getAttrs().end()); 3499 I != E; ++I) 3500 FD->addAttr(*I); 3501 } 3502 } 3503 RD->completeDefinition(); 3504 return RD; 3505 } 3506 return nullptr; 3507 } 3508 3509 static RecordDecl * 3510 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3511 QualType KmpInt32Ty, 3512 QualType KmpRoutineEntryPointerQTy) { 3513 ASTContext &C = CGM.getContext(); 3514 // Build struct kmp_task_t { 3515 // void * shareds; 3516 // kmp_routine_entry_t routine; 3517 // kmp_int32 part_id; 3518 // kmp_cmplrdata_t data1; 3519 // kmp_cmplrdata_t data2; 3520 // For taskloops additional fields: 3521 // kmp_uint64 lb; 3522 // kmp_uint64 ub; 3523 // kmp_int64 st; 3524 // kmp_int32 liter; 3525 // void * reductions; 3526 // }; 3527 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3528 UD->startDefinition(); 3529 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3530 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3531 UD->completeDefinition(); 3532 QualType KmpCmplrdataTy = C.getRecordType(UD); 3533 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3534 RD->startDefinition(); 3535 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3536 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3537 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3538 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3539 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3540 if (isOpenMPTaskLoopDirective(Kind)) { 3541 QualType KmpUInt64Ty = 3542 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3543 QualType KmpInt64Ty = 3544 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3545 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3546 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3547 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3548 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3549 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3550 } 3551 RD->completeDefinition(); 3552 return RD; 3553 } 3554 3555 static RecordDecl * 3556 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3557 ArrayRef<PrivateDataTy> Privates) { 3558 ASTContext &C = CGM.getContext(); 3559 // Build struct kmp_task_t_with_privates { 3560 // kmp_task_t task_data; 3561 // .kmp_privates_t. privates; 3562 // }; 3563 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3564 RD->startDefinition(); 3565 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3566 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3567 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3568 RD->completeDefinition(); 3569 return RD; 3570 } 3571 3572 /// Emit a proxy function which accepts kmp_task_t as the second 3573 /// argument. 3574 /// \code 3575 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3576 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3577 /// For taskloops: 3578 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3579 /// tt->reductions, tt->shareds); 3580 /// return 0; 3581 /// } 3582 /// \endcode 3583 static llvm::Function * 3584 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3585 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3586 QualType KmpTaskTWithPrivatesPtrQTy, 3587 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3588 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3589 llvm::Value *TaskPrivatesMap) { 3590 ASTContext &C = CGM.getContext(); 3591 FunctionArgList Args; 3592 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3593 ImplicitParamDecl::Other); 3594 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3595 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3596 ImplicitParamDecl::Other); 3597 Args.push_back(&GtidArg); 3598 Args.push_back(&TaskTypeArg); 3599 const auto &TaskEntryFnInfo = 3600 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3601 llvm::FunctionType *TaskEntryTy = 3602 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3603 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3604 auto *TaskEntry = llvm::Function::Create( 3605 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3606 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3607 TaskEntry->setDoesNotRecurse(); 3608 CodeGenFunction CGF(CGM); 3609 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3610 Loc, Loc); 3611 3612 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3613 // tt, 3614 // For taskloops: 3615 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3616 // tt->task_data.shareds); 3617 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3618 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3619 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3620 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3621 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3622 const auto *KmpTaskTWithPrivatesQTyRD = 3623 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3624 LValue Base = 3625 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3626 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3627 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3628 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3629 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3630 3631 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3632 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3633 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3634 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3635 CGF.ConvertTypeForMem(SharedsPtrTy)); 3636 3637 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3638 llvm::Value *PrivatesParam; 3639 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3640 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3641 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3642 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3643 } else { 3644 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3645 } 3646 3647 llvm::Value *CommonArgs[] = { 3648 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3649 CGF.Builder 3650 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), 3651 CGF.VoidPtrTy, CGF.Int8Ty) 3652 .getPointer()}; 3653 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3654 std::end(CommonArgs)); 3655 if (isOpenMPTaskLoopDirective(Kind)) { 3656 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3657 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3658 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3659 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3660 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3661 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3662 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3663 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3664 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3665 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3666 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3667 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3668 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3669 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3670 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3671 CallArgs.push_back(LBParam); 3672 CallArgs.push_back(UBParam); 3673 CallArgs.push_back(StParam); 3674 CallArgs.push_back(LIParam); 3675 CallArgs.push_back(RParam); 3676 } 3677 CallArgs.push_back(SharedsParam); 3678 3679 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3680 CallArgs); 3681 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3682 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3683 CGF.FinishFunction(); 3684 return TaskEntry; 3685 } 3686 3687 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3688 SourceLocation Loc, 3689 QualType KmpInt32Ty, 3690 QualType KmpTaskTWithPrivatesPtrQTy, 3691 QualType KmpTaskTWithPrivatesQTy) { 3692 ASTContext &C = CGM.getContext(); 3693 FunctionArgList Args; 3694 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3695 ImplicitParamDecl::Other); 3696 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3697 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3698 ImplicitParamDecl::Other); 3699 Args.push_back(&GtidArg); 3700 Args.push_back(&TaskTypeArg); 3701 const auto &DestructorFnInfo = 3702 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3703 llvm::FunctionType *DestructorFnTy = 3704 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3705 std::string Name = 3706 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3707 auto *DestructorFn = 3708 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3709 Name, &CGM.getModule()); 3710 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3711 DestructorFnInfo); 3712 DestructorFn->setDoesNotRecurse(); 3713 CodeGenFunction CGF(CGM); 3714 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3715 Args, Loc, Loc); 3716 3717 LValue Base = CGF.EmitLoadOfPointerLValue( 3718 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3719 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3720 const auto *KmpTaskTWithPrivatesQTyRD = 3721 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3722 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3723 Base = CGF.EmitLValueForField(Base, *FI); 3724 for (const auto *Field : 3725 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3726 if (QualType::DestructionKind DtorKind = 3727 Field->getType().isDestructedType()) { 3728 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3729 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3730 } 3731 } 3732 CGF.FinishFunction(); 3733 return DestructorFn; 3734 } 3735 3736 /// Emit a privates mapping function for correct handling of private and 3737 /// firstprivate variables. 3738 /// \code 3739 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3740 /// **noalias priv1,..., <tyn> **noalias privn) { 3741 /// *priv1 = &.privates.priv1; 3742 /// ...; 3743 /// *privn = &.privates.privn; 3744 /// } 3745 /// \endcode 3746 static llvm::Value * 3747 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3748 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3749 ArrayRef<PrivateDataTy> Privates) { 3750 ASTContext &C = CGM.getContext(); 3751 FunctionArgList Args; 3752 ImplicitParamDecl TaskPrivatesArg( 3753 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3754 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3755 ImplicitParamDecl::Other); 3756 Args.push_back(&TaskPrivatesArg); 3757 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3758 unsigned Counter = 1; 3759 for (const Expr *E : Data.PrivateVars) { 3760 Args.push_back(ImplicitParamDecl::Create( 3761 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3762 C.getPointerType(C.getPointerType(E->getType())) 3763 .withConst() 3764 .withRestrict(), 3765 ImplicitParamDecl::Other)); 3766 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3767 PrivateVarsPos[VD] = Counter; 3768 ++Counter; 3769 } 3770 for (const Expr *E : Data.FirstprivateVars) { 3771 Args.push_back(ImplicitParamDecl::Create( 3772 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3773 C.getPointerType(C.getPointerType(E->getType())) 3774 .withConst() 3775 .withRestrict(), 3776 ImplicitParamDecl::Other)); 3777 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3778 PrivateVarsPos[VD] = Counter; 3779 ++Counter; 3780 } 3781 for (const Expr *E : Data.LastprivateVars) { 3782 Args.push_back(ImplicitParamDecl::Create( 3783 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3784 C.getPointerType(C.getPointerType(E->getType())) 3785 .withConst() 3786 .withRestrict(), 3787 ImplicitParamDecl::Other)); 3788 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3789 PrivateVarsPos[VD] = Counter; 3790 ++Counter; 3791 } 3792 for (const VarDecl *VD : Data.PrivateLocals) { 3793 QualType Ty = VD->getType().getNonReferenceType(); 3794 if (VD->getType()->isLValueReferenceType()) 3795 Ty = C.getPointerType(Ty); 3796 if (isAllocatableDecl(VD)) 3797 Ty = C.getPointerType(Ty); 3798 Args.push_back(ImplicitParamDecl::Create( 3799 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3800 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3801 ImplicitParamDecl::Other)); 3802 PrivateVarsPos[VD] = Counter; 3803 ++Counter; 3804 } 3805 const auto &TaskPrivatesMapFnInfo = 3806 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3807 llvm::FunctionType *TaskPrivatesMapTy = 3808 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3809 std::string Name = 3810 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3811 auto *TaskPrivatesMap = llvm::Function::Create( 3812 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3813 &CGM.getModule()); 3814 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3815 TaskPrivatesMapFnInfo); 3816 if (CGM.getLangOpts().Optimize) { 3817 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3818 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3819 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3820 } 3821 CodeGenFunction CGF(CGM); 3822 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3823 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3824 3825 // *privi = &.privates.privi; 3826 LValue Base = CGF.EmitLoadOfPointerLValue( 3827 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3828 TaskPrivatesArg.getType()->castAs<PointerType>()); 3829 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3830 Counter = 0; 3831 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3832 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3833 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3834 LValue RefLVal = 3835 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3836 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3837 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3838 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3839 ++Counter; 3840 } 3841 CGF.FinishFunction(); 3842 return TaskPrivatesMap; 3843 } 3844 3845 /// Emit initialization for private variables in task-based directives. 3846 static void emitPrivatesInit(CodeGenFunction &CGF, 3847 const OMPExecutableDirective &D, 3848 Address KmpTaskSharedsPtr, LValue TDBase, 3849 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3850 QualType SharedsTy, QualType SharedsPtrTy, 3851 const OMPTaskDataTy &Data, 3852 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3853 ASTContext &C = CGF.getContext(); 3854 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3855 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3856 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3857 ? OMPD_taskloop 3858 : OMPD_task; 3859 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3860 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3861 LValue SrcBase; 3862 bool IsTargetTask = 3863 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3864 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3865 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3866 // PointersArray, SizesArray, and MappersArray. The original variables for 3867 // these arrays are not captured and we get their addresses explicitly. 3868 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3869 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3870 SrcBase = CGF.MakeAddrLValue( 3871 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3872 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3873 CGF.ConvertTypeForMem(SharedsTy)), 3874 SharedsTy); 3875 } 3876 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3877 for (const PrivateDataTy &Pair : Privates) { 3878 // Do not initialize private locals. 3879 if (Pair.second.isLocalPrivate()) { 3880 ++FI; 3881 continue; 3882 } 3883 const VarDecl *VD = Pair.second.PrivateCopy; 3884 const Expr *Init = VD->getAnyInitializer(); 3885 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3886 !CGF.isTrivialInitializer(Init)))) { 3887 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3888 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3889 const VarDecl *OriginalVD = Pair.second.Original; 3890 // Check if the variable is the target-based BasePointersArray, 3891 // PointersArray, SizesArray, or MappersArray. 3892 LValue SharedRefLValue; 3893 QualType Type = PrivateLValue.getType(); 3894 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3895 if (IsTargetTask && !SharedField) { 3896 assert(isa<ImplicitParamDecl>(OriginalVD) && 3897 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3898 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3899 ->getNumParams() == 0 && 3900 isa<TranslationUnitDecl>( 3901 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3902 ->getDeclContext()) && 3903 "Expected artificial target data variable."); 3904 SharedRefLValue = 3905 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3906 } else if (ForDup) { 3907 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3908 SharedRefLValue = CGF.MakeAddrLValue( 3909 SharedRefLValue.getAddress(CGF).withAlignment( 3910 C.getDeclAlign(OriginalVD)), 3911 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3912 SharedRefLValue.getTBAAInfo()); 3913 } else if (CGF.LambdaCaptureFields.count( 3914 Pair.second.Original->getCanonicalDecl()) > 0 || 3915 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3916 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3917 } else { 3918 // Processing for implicitly captured variables. 3919 InlinedOpenMPRegionRAII Region( 3920 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3921 /*HasCancel=*/false, /*NoInheritance=*/true); 3922 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3923 } 3924 if (Type->isArrayType()) { 3925 // Initialize firstprivate array. 3926 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3927 // Perform simple memcpy. 3928 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3929 } else { 3930 // Initialize firstprivate array using element-by-element 3931 // initialization. 3932 CGF.EmitOMPAggregateAssign( 3933 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3934 Type, 3935 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3936 Address SrcElement) { 3937 // Clean up any temporaries needed by the initialization. 3938 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3939 InitScope.addPrivate(Elem, SrcElement); 3940 (void)InitScope.Privatize(); 3941 // Emit initialization for single element. 3942 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3943 CGF, &CapturesInfo); 3944 CGF.EmitAnyExprToMem(Init, DestElement, 3945 Init->getType().getQualifiers(), 3946 /*IsInitializer=*/false); 3947 }); 3948 } 3949 } else { 3950 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3951 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); 3952 (void)InitScope.Privatize(); 3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3954 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3955 /*capturedByInit=*/false); 3956 } 3957 } else { 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3959 } 3960 } 3961 ++FI; 3962 } 3963 } 3964 3965 /// Check if duplication function is required for taskloops. 3966 static bool checkInitIsRequired(CodeGenFunction &CGF, 3967 ArrayRef<PrivateDataTy> Privates) { 3968 bool InitRequired = false; 3969 for (const PrivateDataTy &Pair : Privates) { 3970 if (Pair.second.isLocalPrivate()) 3971 continue; 3972 const VarDecl *VD = Pair.second.PrivateCopy; 3973 const Expr *Init = VD->getAnyInitializer(); 3974 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3975 !CGF.isTrivialInitializer(Init)); 3976 if (InitRequired) 3977 break; 3978 } 3979 return InitRequired; 3980 } 3981 3982 3983 /// Emit task_dup function (for initialization of 3984 /// private/firstprivate/lastprivate vars and last_iter flag) 3985 /// \code 3986 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3987 /// lastpriv) { 3988 /// // setup lastprivate flag 3989 /// task_dst->last = lastpriv; 3990 /// // could be constructor calls here... 3991 /// } 3992 /// \endcode 3993 static llvm::Value * 3994 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3995 const OMPExecutableDirective &D, 3996 QualType KmpTaskTWithPrivatesPtrQTy, 3997 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3998 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3999 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4000 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4001 ASTContext &C = CGM.getContext(); 4002 FunctionArgList Args; 4003 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4004 KmpTaskTWithPrivatesPtrQTy, 4005 ImplicitParamDecl::Other); 4006 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4007 KmpTaskTWithPrivatesPtrQTy, 4008 ImplicitParamDecl::Other); 4009 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4010 ImplicitParamDecl::Other); 4011 Args.push_back(&DstArg); 4012 Args.push_back(&SrcArg); 4013 Args.push_back(&LastprivArg); 4014 const auto &TaskDupFnInfo = 4015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4016 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4017 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4018 auto *TaskDup = llvm::Function::Create( 4019 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4020 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4021 TaskDup->setDoesNotRecurse(); 4022 CodeGenFunction CGF(CGM); 4023 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4024 Loc); 4025 4026 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4027 CGF.GetAddrOfLocalVar(&DstArg), 4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4029 // task_dst->liter = lastpriv; 4030 if (WithLastIter) { 4031 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4032 LValue Base = CGF.EmitLValueForField( 4033 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4034 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4035 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4036 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4037 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4038 } 4039 4040 // Emit initial values for private copies (if any). 4041 assert(!Privates.empty()); 4042 Address KmpTaskSharedsPtr = Address::invalid(); 4043 if (!Data.FirstprivateVars.empty()) { 4044 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4045 CGF.GetAddrOfLocalVar(&SrcArg), 4046 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4047 LValue Base = CGF.EmitLValueForField( 4048 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4049 KmpTaskSharedsPtr = Address( 4050 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4051 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4052 KmpTaskTShareds)), 4053 Loc), 4054 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4055 } 4056 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4057 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4058 CGF.FinishFunction(); 4059 return TaskDup; 4060 } 4061 4062 /// Checks if destructor function is required to be generated. 4063 /// \return true if cleanups are required, false otherwise. 4064 static bool 4065 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4066 ArrayRef<PrivateDataTy> Privates) { 4067 for (const PrivateDataTy &P : Privates) { 4068 if (P.second.isLocalPrivate()) 4069 continue; 4070 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4071 if (Ty.isDestructedType()) 4072 return true; 4073 } 4074 return false; 4075 } 4076 4077 namespace { 4078 /// Loop generator for OpenMP iterator expression. 4079 class OMPIteratorGeneratorScope final 4080 : public CodeGenFunction::OMPPrivateScope { 4081 CodeGenFunction &CGF; 4082 const OMPIteratorExpr *E = nullptr; 4083 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4084 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4085 OMPIteratorGeneratorScope() = delete; 4086 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4087 4088 public: 4089 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4090 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4091 if (!E) 4092 return; 4093 SmallVector<llvm::Value *, 4> Uppers; 4094 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4095 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4096 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4097 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 4098 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4099 addPrivate( 4100 HelperData.CounterVD, 4101 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 4102 } 4103 Privatize(); 4104 4105 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4106 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4107 LValue CLVal = 4108 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4109 HelperData.CounterVD->getType()); 4110 // Counter = 0; 4111 CGF.EmitStoreOfScalar( 4112 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4113 CLVal); 4114 CodeGenFunction::JumpDest &ContDest = 4115 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4116 CodeGenFunction::JumpDest &ExitDest = 4117 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4118 // N = <number-of_iterations>; 4119 llvm::Value *N = Uppers[I]; 4120 // cont: 4121 // if (Counter < N) goto body; else goto exit; 4122 CGF.EmitBlock(ContDest.getBlock()); 4123 auto *CVal = 4124 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4125 llvm::Value *Cmp = 4126 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4127 ? CGF.Builder.CreateICmpSLT(CVal, N) 4128 : CGF.Builder.CreateICmpULT(CVal, N); 4129 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4130 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4131 // body: 4132 CGF.EmitBlock(BodyBB); 4133 // Iteri = Begini + Counter * Stepi; 4134 CGF.EmitIgnoredExpr(HelperData.Update); 4135 } 4136 } 4137 ~OMPIteratorGeneratorScope() { 4138 if (!E) 4139 return; 4140 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4141 // Counter = Counter + 1; 4142 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4143 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4144 // goto cont; 4145 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4146 // exit: 4147 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4148 } 4149 } 4150 }; 4151 } // namespace 4152 4153 static std::pair<llvm::Value *, llvm::Value *> 4154 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4155 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4156 llvm::Value *Addr; 4157 if (OASE) { 4158 const Expr *Base = OASE->getBase(); 4159 Addr = CGF.EmitScalarExpr(Base); 4160 } else { 4161 Addr = CGF.EmitLValue(E).getPointer(CGF); 4162 } 4163 llvm::Value *SizeVal; 4164 QualType Ty = E->getType(); 4165 if (OASE) { 4166 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4167 for (const Expr *SE : OASE->getDimensions()) { 4168 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4169 Sz = CGF.EmitScalarConversion( 4170 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4171 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4172 } 4173 } else if (const auto *ASE = 4174 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4175 LValue UpAddrLVal = 4176 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4177 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4178 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4179 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4180 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4181 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4182 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4183 } else { 4184 SizeVal = CGF.getTypeSize(Ty); 4185 } 4186 return std::make_pair(Addr, SizeVal); 4187 } 4188 4189 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4190 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4191 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4192 if (KmpTaskAffinityInfoTy.isNull()) { 4193 RecordDecl *KmpAffinityInfoRD = 4194 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4195 KmpAffinityInfoRD->startDefinition(); 4196 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4197 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4198 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4199 KmpAffinityInfoRD->completeDefinition(); 4200 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4201 } 4202 } 4203 4204 CGOpenMPRuntime::TaskResultTy 4205 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4206 const OMPExecutableDirective &D, 4207 llvm::Function *TaskFunction, QualType SharedsTy, 4208 Address Shareds, const OMPTaskDataTy &Data) { 4209 ASTContext &C = CGM.getContext(); 4210 llvm::SmallVector<PrivateDataTy, 4> Privates; 4211 // Aggregate privates and sort them by the alignment. 4212 const auto *I = Data.PrivateCopies.begin(); 4213 for (const Expr *E : Data.PrivateVars) { 4214 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4215 Privates.emplace_back( 4216 C.getDeclAlign(VD), 4217 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4218 /*PrivateElemInit=*/nullptr)); 4219 ++I; 4220 } 4221 I = Data.FirstprivateCopies.begin(); 4222 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4223 for (const Expr *E : Data.FirstprivateVars) { 4224 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4225 Privates.emplace_back( 4226 C.getDeclAlign(VD), 4227 PrivateHelpersTy( 4228 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4229 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4230 ++I; 4231 ++IElemInitRef; 4232 } 4233 I = Data.LastprivateCopies.begin(); 4234 for (const Expr *E : Data.LastprivateVars) { 4235 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4236 Privates.emplace_back( 4237 C.getDeclAlign(VD), 4238 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4239 /*PrivateElemInit=*/nullptr)); 4240 ++I; 4241 } 4242 for (const VarDecl *VD : Data.PrivateLocals) { 4243 if (isAllocatableDecl(VD)) 4244 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4245 else 4246 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4247 } 4248 llvm::stable_sort(Privates, 4249 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4250 return L.first > R.first; 4251 }); 4252 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4253 // Build type kmp_routine_entry_t (if not built yet). 4254 emitKmpRoutineEntryT(KmpInt32Ty); 4255 // Build type kmp_task_t (if not built yet). 4256 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4257 if (SavedKmpTaskloopTQTy.isNull()) { 4258 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4259 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4260 } 4261 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4262 } else { 4263 assert((D.getDirectiveKind() == OMPD_task || 4264 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4265 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4266 "Expected taskloop, task or target directive"); 4267 if (SavedKmpTaskTQTy.isNull()) { 4268 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4269 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4270 } 4271 KmpTaskTQTy = SavedKmpTaskTQTy; 4272 } 4273 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4274 // Build particular struct kmp_task_t for the given task. 4275 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4276 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4277 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4278 QualType KmpTaskTWithPrivatesPtrQTy = 4279 C.getPointerType(KmpTaskTWithPrivatesQTy); 4280 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4281 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4282 KmpTaskTWithPrivatesTy->getPointerTo(); 4283 llvm::Value *KmpTaskTWithPrivatesTySize = 4284 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4285 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4286 4287 // Emit initial values for private copies (if any). 4288 llvm::Value *TaskPrivatesMap = nullptr; 4289 llvm::Type *TaskPrivatesMapTy = 4290 std::next(TaskFunction->arg_begin(), 3)->getType(); 4291 if (!Privates.empty()) { 4292 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4293 TaskPrivatesMap = 4294 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4295 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4296 TaskPrivatesMap, TaskPrivatesMapTy); 4297 } else { 4298 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4299 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4300 } 4301 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4302 // kmp_task_t *tt); 4303 llvm::Function *TaskEntry = emitProxyTaskFunction( 4304 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4305 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4306 TaskPrivatesMap); 4307 4308 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4309 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4310 // kmp_routine_entry_t *task_entry); 4311 // Task flags. Format is taken from 4312 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4313 // description of kmp_tasking_flags struct. 4314 enum { 4315 TiedFlag = 0x1, 4316 FinalFlag = 0x2, 4317 DestructorsFlag = 0x8, 4318 PriorityFlag = 0x20, 4319 DetachableFlag = 0x40, 4320 }; 4321 unsigned Flags = Data.Tied ? TiedFlag : 0; 4322 bool NeedsCleanup = false; 4323 if (!Privates.empty()) { 4324 NeedsCleanup = 4325 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4326 if (NeedsCleanup) 4327 Flags = Flags | DestructorsFlag; 4328 } 4329 if (Data.Priority.getInt()) 4330 Flags = Flags | PriorityFlag; 4331 if (D.hasClausesOfKind<OMPDetachClause>()) 4332 Flags = Flags | DetachableFlag; 4333 llvm::Value *TaskFlags = 4334 Data.Final.getPointer() 4335 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4336 CGF.Builder.getInt32(FinalFlag), 4337 CGF.Builder.getInt32(/*C=*/0)) 4338 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4339 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4340 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4341 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4342 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4343 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4344 TaskEntry, KmpRoutineEntryPtrTy)}; 4345 llvm::Value *NewTask; 4346 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4347 // Check if we have any device clause associated with the directive. 4348 const Expr *Device = nullptr; 4349 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4350 Device = C->getDevice(); 4351 // Emit device ID if any otherwise use default value. 4352 llvm::Value *DeviceID; 4353 if (Device) 4354 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4355 CGF.Int64Ty, /*isSigned=*/true); 4356 else 4357 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4358 AllocArgs.push_back(DeviceID); 4359 NewTask = CGF.EmitRuntimeCall( 4360 OMPBuilder.getOrCreateRuntimeFunction( 4361 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4362 AllocArgs); 4363 } else { 4364 NewTask = 4365 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4366 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4367 AllocArgs); 4368 } 4369 // Emit detach clause initialization. 4370 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4371 // task_descriptor); 4372 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4373 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4374 LValue EvtLVal = CGF.EmitLValue(Evt); 4375 4376 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4377 // int gtid, kmp_task_t *task); 4378 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4379 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4380 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4381 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4382 OMPBuilder.getOrCreateRuntimeFunction( 4383 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4384 {Loc, Tid, NewTask}); 4385 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4386 Evt->getExprLoc()); 4387 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4388 } 4389 // Process affinity clauses. 4390 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4391 // Process list of affinity data. 4392 ASTContext &C = CGM.getContext(); 4393 Address AffinitiesArray = Address::invalid(); 4394 // Calculate number of elements to form the array of affinity data. 4395 llvm::Value *NumOfElements = nullptr; 4396 unsigned NumAffinities = 0; 4397 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4398 if (const Expr *Modifier = C->getModifier()) { 4399 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4400 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4401 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4402 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4403 NumOfElements = 4404 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4405 } 4406 } else { 4407 NumAffinities += C->varlist_size(); 4408 } 4409 } 4410 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4411 // Fields ids in kmp_task_affinity_info record. 4412 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4413 4414 QualType KmpTaskAffinityInfoArrayTy; 4415 if (NumOfElements) { 4416 NumOfElements = CGF.Builder.CreateNUWAdd( 4417 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4418 auto *OVE = new (C) OpaqueValueExpr( 4419 Loc, 4420 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4421 VK_PRValue); 4422 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4423 RValue::get(NumOfElements)); 4424 KmpTaskAffinityInfoArrayTy = 4425 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4426 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4427 // Properly emit variable-sized array. 4428 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4429 ImplicitParamDecl::Other); 4430 CGF.EmitVarDecl(*PD); 4431 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4432 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4433 /*isSigned=*/false); 4434 } else { 4435 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4436 KmpTaskAffinityInfoTy, 4437 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4438 ArrayType::Normal, /*IndexTypeQuals=*/0); 4439 AffinitiesArray = 4440 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4441 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4442 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4443 /*isSigned=*/false); 4444 } 4445 4446 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4447 // Fill array by elements without iterators. 4448 unsigned Pos = 0; 4449 bool HasIterator = false; 4450 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4451 if (C->getModifier()) { 4452 HasIterator = true; 4453 continue; 4454 } 4455 for (const Expr *E : C->varlists()) { 4456 llvm::Value *Addr; 4457 llvm::Value *Size; 4458 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4459 LValue Base = 4460 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4461 KmpTaskAffinityInfoTy); 4462 // affs[i].base_addr = &<Affinities[i].second>; 4463 LValue BaseAddrLVal = CGF.EmitLValueForField( 4464 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4465 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4466 BaseAddrLVal); 4467 // affs[i].len = sizeof(<Affinities[i].second>); 4468 LValue LenLVal = CGF.EmitLValueForField( 4469 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4470 CGF.EmitStoreOfScalar(Size, LenLVal); 4471 ++Pos; 4472 } 4473 } 4474 LValue PosLVal; 4475 if (HasIterator) { 4476 PosLVal = CGF.MakeAddrLValue( 4477 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4478 C.getSizeType()); 4479 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4480 } 4481 // Process elements with iterators. 4482 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4483 const Expr *Modifier = C->getModifier(); 4484 if (!Modifier) 4485 continue; 4486 OMPIteratorGeneratorScope IteratorScope( 4487 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4488 for (const Expr *E : C->varlists()) { 4489 llvm::Value *Addr; 4490 llvm::Value *Size; 4491 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4492 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4493 LValue Base = CGF.MakeAddrLValue( 4494 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4495 // affs[i].base_addr = &<Affinities[i].second>; 4496 LValue BaseAddrLVal = CGF.EmitLValueForField( 4497 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4498 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4499 BaseAddrLVal); 4500 // affs[i].len = sizeof(<Affinities[i].second>); 4501 LValue LenLVal = CGF.EmitLValueForField( 4502 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4503 CGF.EmitStoreOfScalar(Size, LenLVal); 4504 Idx = CGF.Builder.CreateNUWAdd( 4505 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4506 CGF.EmitStoreOfScalar(Idx, PosLVal); 4507 } 4508 } 4509 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4510 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4511 // naffins, kmp_task_affinity_info_t *affin_list); 4512 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4513 llvm::Value *GTid = getThreadID(CGF, Loc); 4514 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4515 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4516 // FIXME: Emit the function and ignore its result for now unless the 4517 // runtime function is properly implemented. 4518 (void)CGF.EmitRuntimeCall( 4519 OMPBuilder.getOrCreateRuntimeFunction( 4520 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4521 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4522 } 4523 llvm::Value *NewTaskNewTaskTTy = 4524 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4525 NewTask, KmpTaskTWithPrivatesPtrTy); 4526 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4527 KmpTaskTWithPrivatesQTy); 4528 LValue TDBase = 4529 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4530 // Fill the data in the resulting kmp_task_t record. 4531 // Copy shareds if there are any. 4532 Address KmpTaskSharedsPtr = Address::invalid(); 4533 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4534 KmpTaskSharedsPtr = Address( 4535 CGF.EmitLoadOfScalar( 4536 CGF.EmitLValueForField( 4537 TDBase, 4538 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 4539 Loc), 4540 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 4541 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4542 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4543 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4544 } 4545 // Emit initial values for private copies (if any). 4546 TaskResultTy Result; 4547 if (!Privates.empty()) { 4548 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4549 SharedsTy, SharedsPtrTy, Data, Privates, 4550 /*ForDup=*/false); 4551 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4552 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4553 Result.TaskDupFn = emitTaskDupFunction( 4554 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4555 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4556 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4557 } 4558 } 4559 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4560 enum { Priority = 0, Destructors = 1 }; 4561 // Provide pointer to function with destructors for privates. 4562 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4563 const RecordDecl *KmpCmplrdataUD = 4564 (*FI)->getType()->getAsUnionType()->getDecl(); 4565 if (NeedsCleanup) { 4566 llvm::Value *DestructorFn = emitDestructorsFunction( 4567 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4568 KmpTaskTWithPrivatesQTy); 4569 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4570 LValue DestructorsLV = CGF.EmitLValueForField( 4571 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4572 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4573 DestructorFn, KmpRoutineEntryPtrTy), 4574 DestructorsLV); 4575 } 4576 // Set priority. 4577 if (Data.Priority.getInt()) { 4578 LValue Data2LV = CGF.EmitLValueForField( 4579 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4580 LValue PriorityLV = CGF.EmitLValueForField( 4581 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4582 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4583 } 4584 Result.NewTask = NewTask; 4585 Result.TaskEntry = TaskEntry; 4586 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4587 Result.TDBase = TDBase; 4588 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4589 return Result; 4590 } 4591 4592 namespace { 4593 /// Dependence kind for RTL. 4594 enum RTLDependenceKindTy { 4595 DepIn = 0x01, 4596 DepInOut = 0x3, 4597 DepMutexInOutSet = 0x4, 4598 DepInOutSet = 0x8 4599 }; 4600 /// Fields ids in kmp_depend_info record. 4601 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4602 } // namespace 4603 4604 /// Translates internal dependency kind into the runtime kind. 4605 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4606 RTLDependenceKindTy DepKind; 4607 switch (K) { 4608 case OMPC_DEPEND_in: 4609 DepKind = DepIn; 4610 break; 4611 // Out and InOut dependencies must use the same code. 4612 case OMPC_DEPEND_out: 4613 case OMPC_DEPEND_inout: 4614 DepKind = DepInOut; 4615 break; 4616 case OMPC_DEPEND_mutexinoutset: 4617 DepKind = DepMutexInOutSet; 4618 break; 4619 case OMPC_DEPEND_inoutset: 4620 DepKind = DepInOutSet; 4621 break; 4622 case OMPC_DEPEND_source: 4623 case OMPC_DEPEND_sink: 4624 case OMPC_DEPEND_depobj: 4625 case OMPC_DEPEND_unknown: 4626 llvm_unreachable("Unknown task dependence type"); 4627 } 4628 return DepKind; 4629 } 4630 4631 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4632 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4633 QualType &FlagsTy) { 4634 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4635 if (KmpDependInfoTy.isNull()) { 4636 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4637 KmpDependInfoRD->startDefinition(); 4638 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4639 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4640 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4641 KmpDependInfoRD->completeDefinition(); 4642 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4643 } 4644 } 4645 4646 std::pair<llvm::Value *, LValue> 4647 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4648 SourceLocation Loc) { 4649 ASTContext &C = CGM.getContext(); 4650 QualType FlagsTy; 4651 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4652 RecordDecl *KmpDependInfoRD = 4653 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4654 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4655 LValue Base = CGF.EmitLoadOfPointerLValue( 4656 CGF.Builder.CreateElementBitCast( 4657 DepobjLVal.getAddress(CGF), 4658 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4659 KmpDependInfoPtrTy->castAs<PointerType>()); 4660 Address DepObjAddr = CGF.Builder.CreateGEP( 4661 Base.getAddress(CGF), 4662 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4663 LValue NumDepsBase = CGF.MakeAddrLValue( 4664 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4665 // NumDeps = deps[i].base_addr; 4666 LValue BaseAddrLVal = CGF.EmitLValueForField( 4667 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4668 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4669 return std::make_pair(NumDeps, Base); 4670 } 4671 4672 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4673 llvm::PointerUnion<unsigned *, LValue *> Pos, 4674 const OMPTaskDataTy::DependData &Data, 4675 Address DependenciesArray) { 4676 CodeGenModule &CGM = CGF.CGM; 4677 ASTContext &C = CGM.getContext(); 4678 QualType FlagsTy; 4679 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4680 RecordDecl *KmpDependInfoRD = 4681 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4682 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4683 4684 OMPIteratorGeneratorScope IteratorScope( 4685 CGF, cast_or_null<OMPIteratorExpr>( 4686 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4687 : nullptr)); 4688 for (const Expr *E : Data.DepExprs) { 4689 llvm::Value *Addr; 4690 llvm::Value *Size; 4691 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4692 LValue Base; 4693 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4694 Base = CGF.MakeAddrLValue( 4695 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4696 } else { 4697 LValue &PosLVal = *Pos.get<LValue *>(); 4698 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4699 Base = CGF.MakeAddrLValue( 4700 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4701 } 4702 // deps[i].base_addr = &<Dependencies[i].second>; 4703 LValue BaseAddrLVal = CGF.EmitLValueForField( 4704 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4705 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4706 BaseAddrLVal); 4707 // deps[i].len = sizeof(<Dependencies[i].second>); 4708 LValue LenLVal = CGF.EmitLValueForField( 4709 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4710 CGF.EmitStoreOfScalar(Size, LenLVal); 4711 // deps[i].flags = <Dependencies[i].first>; 4712 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4713 LValue FlagsLVal = CGF.EmitLValueForField( 4714 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4715 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4716 FlagsLVal); 4717 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4718 ++(*P); 4719 } else { 4720 LValue &PosLVal = *Pos.get<LValue *>(); 4721 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4722 Idx = CGF.Builder.CreateNUWAdd(Idx, 4723 llvm::ConstantInt::get(Idx->getType(), 1)); 4724 CGF.EmitStoreOfScalar(Idx, PosLVal); 4725 } 4726 } 4727 } 4728 4729 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4730 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4731 const OMPTaskDataTy::DependData &Data) { 4732 assert(Data.DepKind == OMPC_DEPEND_depobj && 4733 "Expected depobj dependecy kind."); 4734 SmallVector<llvm::Value *, 4> Sizes; 4735 SmallVector<LValue, 4> SizeLVals; 4736 ASTContext &C = CGF.getContext(); 4737 { 4738 OMPIteratorGeneratorScope IteratorScope( 4739 CGF, cast_or_null<OMPIteratorExpr>( 4740 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4741 : nullptr)); 4742 for (const Expr *E : Data.DepExprs) { 4743 llvm::Value *NumDeps; 4744 LValue Base; 4745 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4746 std::tie(NumDeps, Base) = 4747 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4748 LValue NumLVal = CGF.MakeAddrLValue( 4749 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4750 C.getUIntPtrType()); 4751 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4752 NumLVal.getAddress(CGF)); 4753 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4754 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4755 CGF.EmitStoreOfScalar(Add, NumLVal); 4756 SizeLVals.push_back(NumLVal); 4757 } 4758 } 4759 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4760 llvm::Value *Size = 4761 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4762 Sizes.push_back(Size); 4763 } 4764 return Sizes; 4765 } 4766 4767 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4768 QualType &KmpDependInfoTy, 4769 LValue PosLVal, 4770 const OMPTaskDataTy::DependData &Data, 4771 Address DependenciesArray) { 4772 assert(Data.DepKind == OMPC_DEPEND_depobj && 4773 "Expected depobj dependecy kind."); 4774 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4775 { 4776 OMPIteratorGeneratorScope IteratorScope( 4777 CGF, cast_or_null<OMPIteratorExpr>( 4778 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4779 : nullptr)); 4780 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4781 const Expr *E = Data.DepExprs[I]; 4782 llvm::Value *NumDeps; 4783 LValue Base; 4784 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4785 std::tie(NumDeps, Base) = 4786 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4787 4788 // memcopy dependency data. 4789 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4790 ElSize, 4791 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4792 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4793 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4794 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4795 4796 // Increase pos. 4797 // pos += size; 4798 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4799 CGF.EmitStoreOfScalar(Add, PosLVal); 4800 } 4801 } 4802 } 4803 4804 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4805 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4806 SourceLocation Loc) { 4807 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4808 return D.DepExprs.empty(); 4809 })) 4810 return std::make_pair(nullptr, Address::invalid()); 4811 // Process list of dependencies. 4812 ASTContext &C = CGM.getContext(); 4813 Address DependenciesArray = Address::invalid(); 4814 llvm::Value *NumOfElements = nullptr; 4815 unsigned NumDependencies = std::accumulate( 4816 Dependencies.begin(), Dependencies.end(), 0, 4817 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4818 return D.DepKind == OMPC_DEPEND_depobj 4819 ? V 4820 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4821 }); 4822 QualType FlagsTy; 4823 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4824 bool HasDepobjDeps = false; 4825 bool HasRegularWithIterators = false; 4826 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4827 llvm::Value *NumOfRegularWithIterators = 4828 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4829 // Calculate number of depobj dependecies and regular deps with the iterators. 4830 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4831 if (D.DepKind == OMPC_DEPEND_depobj) { 4832 SmallVector<llvm::Value *, 4> Sizes = 4833 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4834 for (llvm::Value *Size : Sizes) { 4835 NumOfDepobjElements = 4836 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4837 } 4838 HasDepobjDeps = true; 4839 continue; 4840 } 4841 // Include number of iterations, if any. 4842 4843 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4844 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4845 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4846 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4847 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4848 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4849 NumOfRegularWithIterators = 4850 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4851 } 4852 HasRegularWithIterators = true; 4853 continue; 4854 } 4855 } 4856 4857 QualType KmpDependInfoArrayTy; 4858 if (HasDepobjDeps || HasRegularWithIterators) { 4859 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4860 /*isSigned=*/false); 4861 if (HasDepobjDeps) { 4862 NumOfElements = 4863 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4864 } 4865 if (HasRegularWithIterators) { 4866 NumOfElements = 4867 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4868 } 4869 auto *OVE = new (C) OpaqueValueExpr( 4870 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4871 VK_PRValue); 4872 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4873 RValue::get(NumOfElements)); 4874 KmpDependInfoArrayTy = 4875 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4876 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4877 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4878 // Properly emit variable-sized array. 4879 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4880 ImplicitParamDecl::Other); 4881 CGF.EmitVarDecl(*PD); 4882 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4883 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4884 /*isSigned=*/false); 4885 } else { 4886 KmpDependInfoArrayTy = C.getConstantArrayType( 4887 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4888 ArrayType::Normal, /*IndexTypeQuals=*/0); 4889 DependenciesArray = 4890 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4891 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4892 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4893 /*isSigned=*/false); 4894 } 4895 unsigned Pos = 0; 4896 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4897 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4898 Dependencies[I].IteratorExpr) 4899 continue; 4900 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4901 DependenciesArray); 4902 } 4903 // Copy regular dependecies with iterators. 4904 LValue PosLVal = CGF.MakeAddrLValue( 4905 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4906 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4907 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4908 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4909 !Dependencies[I].IteratorExpr) 4910 continue; 4911 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4912 DependenciesArray); 4913 } 4914 // Copy final depobj arrays without iterators. 4915 if (HasDepobjDeps) { 4916 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4917 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4918 continue; 4919 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4920 DependenciesArray); 4921 } 4922 } 4923 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4924 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4925 return std::make_pair(NumOfElements, DependenciesArray); 4926 } 4927 4928 Address CGOpenMPRuntime::emitDepobjDependClause( 4929 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4930 SourceLocation Loc) { 4931 if (Dependencies.DepExprs.empty()) 4932 return Address::invalid(); 4933 // Process list of dependencies. 4934 ASTContext &C = CGM.getContext(); 4935 Address DependenciesArray = Address::invalid(); 4936 unsigned NumDependencies = Dependencies.DepExprs.size(); 4937 QualType FlagsTy; 4938 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4939 RecordDecl *KmpDependInfoRD = 4940 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4941 4942 llvm::Value *Size; 4943 // Define type kmp_depend_info[<Dependencies.size()>]; 4944 // For depobj reserve one extra element to store the number of elements. 4945 // It is required to handle depobj(x) update(in) construct. 4946 // kmp_depend_info[<Dependencies.size()>] deps; 4947 llvm::Value *NumDepsVal; 4948 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4949 if (const auto *IE = 4950 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4951 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4952 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4953 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4954 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4955 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4956 } 4957 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4958 NumDepsVal); 4959 CharUnits SizeInBytes = 4960 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4961 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4962 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4963 NumDepsVal = 4964 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4965 } else { 4966 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4967 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4968 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4969 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4970 Size = CGM.getSize(Sz.alignTo(Align)); 4971 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4972 } 4973 // Need to allocate on the dynamic memory. 4974 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4975 // Use default allocator. 4976 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4977 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4978 4979 llvm::Value *Addr = 4980 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4981 CGM.getModule(), OMPRTL___kmpc_alloc), 4982 Args, ".dep.arr.addr"); 4983 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4984 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4985 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4986 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4987 // Write number of elements in the first element of array for depobj. 4988 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4989 // deps[i].base_addr = NumDependencies; 4990 LValue BaseAddrLVal = CGF.EmitLValueForField( 4991 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4992 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4993 llvm::PointerUnion<unsigned *, LValue *> Pos; 4994 unsigned Idx = 1; 4995 LValue PosLVal; 4996 if (Dependencies.IteratorExpr) { 4997 PosLVal = CGF.MakeAddrLValue( 4998 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4999 C.getSizeType()); 5000 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5001 /*IsInit=*/true); 5002 Pos = &PosLVal; 5003 } else { 5004 Pos = &Idx; 5005 } 5006 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5007 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5008 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 5009 CGF.Int8Ty); 5010 return DependenciesArray; 5011 } 5012 5013 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5014 SourceLocation Loc) { 5015 ASTContext &C = CGM.getContext(); 5016 QualType FlagsTy; 5017 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5018 LValue Base = CGF.EmitLoadOfPointerLValue( 5019 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); 5020 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5021 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5022 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 5023 CGF.ConvertTypeForMem(KmpDependInfoTy)); 5024 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5025 Addr.getElementType(), Addr.getPointer(), 5026 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5027 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5028 CGF.VoidPtrTy); 5029 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5030 // Use default allocator. 5031 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5032 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5033 5034 // _kmpc_free(gtid, addr, nullptr); 5035 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5036 CGM.getModule(), OMPRTL___kmpc_free), 5037 Args); 5038 } 5039 5040 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5041 OpenMPDependClauseKind NewDepKind, 5042 SourceLocation Loc) { 5043 ASTContext &C = CGM.getContext(); 5044 QualType FlagsTy; 5045 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5046 RecordDecl *KmpDependInfoRD = 5047 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5048 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5049 llvm::Value *NumDeps; 5050 LValue Base; 5051 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5052 5053 Address Begin = Base.getAddress(CGF); 5054 // Cast from pointer to array type to pointer to single element. 5055 llvm::Value *End = CGF.Builder.CreateGEP( 5056 Begin.getElementType(), Begin.getPointer(), NumDeps); 5057 // The basic structure here is a while-do loop. 5058 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5059 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5060 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5061 CGF.EmitBlock(BodyBB); 5062 llvm::PHINode *ElementPHI = 5063 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5064 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5065 Begin = Begin.withPointer(ElementPHI); 5066 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5067 Base.getTBAAInfo()); 5068 // deps[i].flags = NewDepKind; 5069 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5070 LValue FlagsLVal = CGF.EmitLValueForField( 5071 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5072 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5073 FlagsLVal); 5074 5075 // Shift the address forward by one element. 5076 Address ElementNext = 5077 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5078 ElementPHI->addIncoming(ElementNext.getPointer(), 5079 CGF.Builder.GetInsertBlock()); 5080 llvm::Value *IsEmpty = 5081 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5082 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5083 // Done. 5084 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5085 } 5086 5087 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5088 const OMPExecutableDirective &D, 5089 llvm::Function *TaskFunction, 5090 QualType SharedsTy, Address Shareds, 5091 const Expr *IfCond, 5092 const OMPTaskDataTy &Data) { 5093 if (!CGF.HaveInsertPoint()) 5094 return; 5095 5096 TaskResultTy Result = 5097 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5098 llvm::Value *NewTask = Result.NewTask; 5099 llvm::Function *TaskEntry = Result.TaskEntry; 5100 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5101 LValue TDBase = Result.TDBase; 5102 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5103 // Process list of dependences. 5104 Address DependenciesArray = Address::invalid(); 5105 llvm::Value *NumOfElements; 5106 std::tie(NumOfElements, DependenciesArray) = 5107 emitDependClause(CGF, Data.Dependences, Loc); 5108 5109 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5110 // libcall. 5111 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5112 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5113 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5114 // list is not empty 5115 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5116 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5117 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5118 llvm::Value *DepTaskArgs[7]; 5119 if (!Data.Dependences.empty()) { 5120 DepTaskArgs[0] = UpLoc; 5121 DepTaskArgs[1] = ThreadID; 5122 DepTaskArgs[2] = NewTask; 5123 DepTaskArgs[3] = NumOfElements; 5124 DepTaskArgs[4] = DependenciesArray.getPointer(); 5125 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5126 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5127 } 5128 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5129 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5130 if (!Data.Tied) { 5131 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5132 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5133 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5134 } 5135 if (!Data.Dependences.empty()) { 5136 CGF.EmitRuntimeCall( 5137 OMPBuilder.getOrCreateRuntimeFunction( 5138 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5139 DepTaskArgs); 5140 } else { 5141 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5142 CGM.getModule(), OMPRTL___kmpc_omp_task), 5143 TaskArgs); 5144 } 5145 // Check if parent region is untied and build return for untied task; 5146 if (auto *Region = 5147 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5148 Region->emitUntiedSwitch(CGF); 5149 }; 5150 5151 llvm::Value *DepWaitTaskArgs[6]; 5152 if (!Data.Dependences.empty()) { 5153 DepWaitTaskArgs[0] = UpLoc; 5154 DepWaitTaskArgs[1] = ThreadID; 5155 DepWaitTaskArgs[2] = NumOfElements; 5156 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5157 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5158 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5159 } 5160 auto &M = CGM.getModule(); 5161 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5162 TaskEntry, &Data, &DepWaitTaskArgs, 5163 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5164 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5165 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5166 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5167 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5168 // is specified. 5169 if (!Data.Dependences.empty()) 5170 CGF.EmitRuntimeCall( 5171 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5172 DepWaitTaskArgs); 5173 // Call proxy_task_entry(gtid, new_task); 5174 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5175 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5176 Action.Enter(CGF); 5177 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5178 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5179 OutlinedFnArgs); 5180 }; 5181 5182 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5183 // kmp_task_t *new_task); 5184 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5185 // kmp_task_t *new_task); 5186 RegionCodeGenTy RCG(CodeGen); 5187 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5188 M, OMPRTL___kmpc_omp_task_begin_if0), 5189 TaskArgs, 5190 OMPBuilder.getOrCreateRuntimeFunction( 5191 M, OMPRTL___kmpc_omp_task_complete_if0), 5192 TaskArgs); 5193 RCG.setAction(Action); 5194 RCG(CGF); 5195 }; 5196 5197 if (IfCond) { 5198 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5199 } else { 5200 RegionCodeGenTy ThenRCG(ThenCodeGen); 5201 ThenRCG(CGF); 5202 } 5203 } 5204 5205 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5206 const OMPLoopDirective &D, 5207 llvm::Function *TaskFunction, 5208 QualType SharedsTy, Address Shareds, 5209 const Expr *IfCond, 5210 const OMPTaskDataTy &Data) { 5211 if (!CGF.HaveInsertPoint()) 5212 return; 5213 TaskResultTy Result = 5214 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5215 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5216 // libcall. 5217 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5218 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5219 // sched, kmp_uint64 grainsize, void *task_dup); 5220 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5221 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5222 llvm::Value *IfVal; 5223 if (IfCond) { 5224 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5225 /*isSigned=*/true); 5226 } else { 5227 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5228 } 5229 5230 LValue LBLVal = CGF.EmitLValueForField( 5231 Result.TDBase, 5232 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5233 const auto *LBVar = 5234 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5235 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5236 LBLVal.getQuals(), 5237 /*IsInitializer=*/true); 5238 LValue UBLVal = CGF.EmitLValueForField( 5239 Result.TDBase, 5240 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5241 const auto *UBVar = 5242 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5243 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5244 UBLVal.getQuals(), 5245 /*IsInitializer=*/true); 5246 LValue StLVal = CGF.EmitLValueForField( 5247 Result.TDBase, 5248 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5249 const auto *StVar = 5250 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5251 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5252 StLVal.getQuals(), 5253 /*IsInitializer=*/true); 5254 // Store reductions address. 5255 LValue RedLVal = CGF.EmitLValueForField( 5256 Result.TDBase, 5257 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5258 if (Data.Reductions) { 5259 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5260 } else { 5261 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5262 CGF.getContext().VoidPtrTy); 5263 } 5264 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5265 llvm::Value *TaskArgs[] = { 5266 UpLoc, 5267 ThreadID, 5268 Result.NewTask, 5269 IfVal, 5270 LBLVal.getPointer(CGF), 5271 UBLVal.getPointer(CGF), 5272 CGF.EmitLoadOfScalar(StLVal, Loc), 5273 llvm::ConstantInt::getSigned( 5274 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5275 llvm::ConstantInt::getSigned( 5276 CGF.IntTy, Data.Schedule.getPointer() 5277 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5278 : NoSchedule), 5279 Data.Schedule.getPointer() 5280 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5281 /*isSigned=*/false) 5282 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5283 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5284 Result.TaskDupFn, CGF.VoidPtrTy) 5285 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5286 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5287 CGM.getModule(), OMPRTL___kmpc_taskloop), 5288 TaskArgs); 5289 } 5290 5291 /// Emit reduction operation for each element of array (required for 5292 /// array sections) LHS op = RHS. 5293 /// \param Type Type of array. 5294 /// \param LHSVar Variable on the left side of the reduction operation 5295 /// (references element of array in original variable). 5296 /// \param RHSVar Variable on the right side of the reduction operation 5297 /// (references element of array in original variable). 5298 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5299 /// RHSVar. 5300 static void EmitOMPAggregateReduction( 5301 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5302 const VarDecl *RHSVar, 5303 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5304 const Expr *, const Expr *)> &RedOpGen, 5305 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5306 const Expr *UpExpr = nullptr) { 5307 // Perform element-by-element initialization. 5308 QualType ElementTy; 5309 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5310 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5311 5312 // Drill down to the base element type on both arrays. 5313 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5314 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5315 5316 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5317 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5318 // Cast from pointer to array type to pointer to single element. 5319 llvm::Value *LHSEnd = 5320 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5321 // The basic structure here is a while-do loop. 5322 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5323 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5324 llvm::Value *IsEmpty = 5325 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5326 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5327 5328 // Enter the loop body, making that address the current address. 5329 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5330 CGF.EmitBlock(BodyBB); 5331 5332 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5333 5334 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5335 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5336 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5337 Address RHSElementCurrent( 5338 RHSElementPHI, RHSAddr.getElementType(), 5339 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5340 5341 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5342 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5343 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5344 Address LHSElementCurrent( 5345 LHSElementPHI, LHSAddr.getElementType(), 5346 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5347 5348 // Emit copy. 5349 CodeGenFunction::OMPPrivateScope Scope(CGF); 5350 Scope.addPrivate(LHSVar, LHSElementCurrent); 5351 Scope.addPrivate(RHSVar, RHSElementCurrent); 5352 Scope.Privatize(); 5353 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5354 Scope.ForceCleanup(); 5355 5356 // Shift the address forward by one element. 5357 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5358 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5359 "omp.arraycpy.dest.element"); 5360 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5361 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5362 "omp.arraycpy.src.element"); 5363 // Check whether we've reached the end. 5364 llvm::Value *Done = 5365 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5366 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5367 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5368 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5369 5370 // Done. 5371 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5372 } 5373 5374 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5375 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5376 /// UDR combiner function. 5377 static void emitReductionCombiner(CodeGenFunction &CGF, 5378 const Expr *ReductionOp) { 5379 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5380 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5381 if (const auto *DRE = 5382 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5383 if (const auto *DRD = 5384 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5385 std::pair<llvm::Function *, llvm::Function *> Reduction = 5386 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5387 RValue Func = RValue::get(Reduction.first); 5388 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5389 CGF.EmitIgnoredExpr(ReductionOp); 5390 return; 5391 } 5392 CGF.EmitIgnoredExpr(ReductionOp); 5393 } 5394 5395 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5396 SourceLocation Loc, llvm::Type *ArgsElemType, 5397 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 5398 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 5399 ASTContext &C = CGM.getContext(); 5400 5401 // void reduction_func(void *LHSArg, void *RHSArg); 5402 FunctionArgList Args; 5403 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5404 ImplicitParamDecl::Other); 5405 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5406 ImplicitParamDecl::Other); 5407 Args.push_back(&LHSArg); 5408 Args.push_back(&RHSArg); 5409 const auto &CGFI = 5410 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5411 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5412 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5413 llvm::GlobalValue::InternalLinkage, Name, 5414 &CGM.getModule()); 5415 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5416 Fn->setDoesNotRecurse(); 5417 CodeGenFunction CGF(CGM); 5418 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5419 5420 // Dst = (void*[n])(LHSArg); 5421 // Src = (void*[n])(RHSArg); 5422 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5423 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5424 ArgsElemType->getPointerTo()), 5425 ArgsElemType, CGF.getPointerAlign()); 5426 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5427 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5428 ArgsElemType->getPointerTo()), 5429 ArgsElemType, CGF.getPointerAlign()); 5430 5431 // ... 5432 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5433 // ... 5434 CodeGenFunction::OMPPrivateScope Scope(CGF); 5435 const auto *IPriv = Privates.begin(); 5436 unsigned Idx = 0; 5437 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5438 const auto *RHSVar = 5439 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5440 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 5441 const auto *LHSVar = 5442 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5443 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 5444 QualType PrivTy = (*IPriv)->getType(); 5445 if (PrivTy->isVariablyModifiedType()) { 5446 // Get array size and emit VLA type. 5447 ++Idx; 5448 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5449 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5450 const VariableArrayType *VLA = 5451 CGF.getContext().getAsVariableArrayType(PrivTy); 5452 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5453 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5454 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5455 CGF.EmitVariablyModifiedType(PrivTy); 5456 } 5457 } 5458 Scope.Privatize(); 5459 IPriv = Privates.begin(); 5460 const auto *ILHS = LHSExprs.begin(); 5461 const auto *IRHS = RHSExprs.begin(); 5462 for (const Expr *E : ReductionOps) { 5463 if ((*IPriv)->getType()->isArrayType()) { 5464 // Emit reduction for array section. 5465 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5466 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5467 EmitOMPAggregateReduction( 5468 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5469 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5470 emitReductionCombiner(CGF, E); 5471 }); 5472 } else { 5473 // Emit reduction for array subscript or single variable. 5474 emitReductionCombiner(CGF, E); 5475 } 5476 ++IPriv; 5477 ++ILHS; 5478 ++IRHS; 5479 } 5480 Scope.ForceCleanup(); 5481 CGF.FinishFunction(); 5482 return Fn; 5483 } 5484 5485 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5486 const Expr *ReductionOp, 5487 const Expr *PrivateRef, 5488 const DeclRefExpr *LHS, 5489 const DeclRefExpr *RHS) { 5490 if (PrivateRef->getType()->isArrayType()) { 5491 // Emit reduction for array section. 5492 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5493 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5494 EmitOMPAggregateReduction( 5495 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5496 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5497 emitReductionCombiner(CGF, ReductionOp); 5498 }); 5499 } else { 5500 // Emit reduction for array subscript or single variable. 5501 emitReductionCombiner(CGF, ReductionOp); 5502 } 5503 } 5504 5505 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5506 ArrayRef<const Expr *> Privates, 5507 ArrayRef<const Expr *> LHSExprs, 5508 ArrayRef<const Expr *> RHSExprs, 5509 ArrayRef<const Expr *> ReductionOps, 5510 ReductionOptionsTy Options) { 5511 if (!CGF.HaveInsertPoint()) 5512 return; 5513 5514 bool WithNowait = Options.WithNowait; 5515 bool SimpleReduction = Options.SimpleReduction; 5516 5517 // Next code should be emitted for reduction: 5518 // 5519 // static kmp_critical_name lock = { 0 }; 5520 // 5521 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5522 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5523 // ... 5524 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5525 // *(Type<n>-1*)rhs[<n>-1]); 5526 // } 5527 // 5528 // ... 5529 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5530 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5531 // RedList, reduce_func, &<lock>)) { 5532 // case 1: 5533 // ... 5534 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5535 // ... 5536 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5537 // break; 5538 // case 2: 5539 // ... 5540 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5541 // ... 5542 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5543 // break; 5544 // default:; 5545 // } 5546 // 5547 // if SimpleReduction is true, only the next code is generated: 5548 // ... 5549 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5550 // ... 5551 5552 ASTContext &C = CGM.getContext(); 5553 5554 if (SimpleReduction) { 5555 CodeGenFunction::RunCleanupsScope Scope(CGF); 5556 const auto *IPriv = Privates.begin(); 5557 const auto *ILHS = LHSExprs.begin(); 5558 const auto *IRHS = RHSExprs.begin(); 5559 for (const Expr *E : ReductionOps) { 5560 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5561 cast<DeclRefExpr>(*IRHS)); 5562 ++IPriv; 5563 ++ILHS; 5564 ++IRHS; 5565 } 5566 return; 5567 } 5568 5569 // 1. Build a list of reduction variables. 5570 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5571 auto Size = RHSExprs.size(); 5572 for (const Expr *E : Privates) { 5573 if (E->getType()->isVariablyModifiedType()) 5574 // Reserve place for array size. 5575 ++Size; 5576 } 5577 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5578 QualType ReductionArrayTy = 5579 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5580 /*IndexTypeQuals=*/0); 5581 Address ReductionList = 5582 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5583 const auto *IPriv = Privates.begin(); 5584 unsigned Idx = 0; 5585 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5586 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5587 CGF.Builder.CreateStore( 5588 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5589 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5590 Elem); 5591 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5592 // Store array size. 5593 ++Idx; 5594 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5595 llvm::Value *Size = CGF.Builder.CreateIntCast( 5596 CGF.getVLASize( 5597 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5598 .NumElts, 5599 CGF.SizeTy, /*isSigned=*/false); 5600 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5601 Elem); 5602 } 5603 } 5604 5605 // 2. Emit reduce_func(). 5606 llvm::Function *ReductionFn = 5607 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5608 Privates, LHSExprs, RHSExprs, ReductionOps); 5609 5610 // 3. Create static kmp_critical_name lock = { 0 }; 5611 std::string Name = getName({"reduction"}); 5612 llvm::Value *Lock = getCriticalRegionLock(Name); 5613 5614 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5615 // RedList, reduce_func, &<lock>); 5616 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5617 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5618 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5619 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5620 ReductionList.getPointer(), CGF.VoidPtrTy); 5621 llvm::Value *Args[] = { 5622 IdentTLoc, // ident_t *<loc> 5623 ThreadId, // i32 <gtid> 5624 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5625 ReductionArrayTySize, // size_type sizeof(RedList) 5626 RL, // void *RedList 5627 ReductionFn, // void (*) (void *, void *) <reduce_func> 5628 Lock // kmp_critical_name *&<lock> 5629 }; 5630 llvm::Value *Res = CGF.EmitRuntimeCall( 5631 OMPBuilder.getOrCreateRuntimeFunction( 5632 CGM.getModule(), 5633 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5634 Args); 5635 5636 // 5. Build switch(res) 5637 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5638 llvm::SwitchInst *SwInst = 5639 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5640 5641 // 6. Build case 1: 5642 // ... 5643 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5644 // ... 5645 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5646 // break; 5647 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5648 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5649 CGF.EmitBlock(Case1BB); 5650 5651 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5652 llvm::Value *EndArgs[] = { 5653 IdentTLoc, // ident_t *<loc> 5654 ThreadId, // i32 <gtid> 5655 Lock // kmp_critical_name *&<lock> 5656 }; 5657 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5658 CodeGenFunction &CGF, PrePostActionTy &Action) { 5659 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5660 const auto *IPriv = Privates.begin(); 5661 const auto *ILHS = LHSExprs.begin(); 5662 const auto *IRHS = RHSExprs.begin(); 5663 for (const Expr *E : ReductionOps) { 5664 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5665 cast<DeclRefExpr>(*IRHS)); 5666 ++IPriv; 5667 ++ILHS; 5668 ++IRHS; 5669 } 5670 }; 5671 RegionCodeGenTy RCG(CodeGen); 5672 CommonActionTy Action( 5673 nullptr, llvm::None, 5674 OMPBuilder.getOrCreateRuntimeFunction( 5675 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5676 : OMPRTL___kmpc_end_reduce), 5677 EndArgs); 5678 RCG.setAction(Action); 5679 RCG(CGF); 5680 5681 CGF.EmitBranch(DefaultBB); 5682 5683 // 7. Build case 2: 5684 // ... 5685 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5686 // ... 5687 // break; 5688 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5689 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5690 CGF.EmitBlock(Case2BB); 5691 5692 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5693 CodeGenFunction &CGF, PrePostActionTy &Action) { 5694 const auto *ILHS = LHSExprs.begin(); 5695 const auto *IRHS = RHSExprs.begin(); 5696 const auto *IPriv = Privates.begin(); 5697 for (const Expr *E : ReductionOps) { 5698 const Expr *XExpr = nullptr; 5699 const Expr *EExpr = nullptr; 5700 const Expr *UpExpr = nullptr; 5701 BinaryOperatorKind BO = BO_Comma; 5702 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5703 if (BO->getOpcode() == BO_Assign) { 5704 XExpr = BO->getLHS(); 5705 UpExpr = BO->getRHS(); 5706 } 5707 } 5708 // Try to emit update expression as a simple atomic. 5709 const Expr *RHSExpr = UpExpr; 5710 if (RHSExpr) { 5711 // Analyze RHS part of the whole expression. 5712 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5713 RHSExpr->IgnoreParenImpCasts())) { 5714 // If this is a conditional operator, analyze its condition for 5715 // min/max reduction operator. 5716 RHSExpr = ACO->getCond(); 5717 } 5718 if (const auto *BORHS = 5719 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5720 EExpr = BORHS->getRHS(); 5721 BO = BORHS->getOpcode(); 5722 } 5723 } 5724 if (XExpr) { 5725 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5726 auto &&AtomicRedGen = [BO, VD, 5727 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5728 const Expr *EExpr, const Expr *UpExpr) { 5729 LValue X = CGF.EmitLValue(XExpr); 5730 RValue E; 5731 if (EExpr) 5732 E = CGF.EmitAnyExpr(EExpr); 5733 CGF.EmitOMPAtomicSimpleUpdateExpr( 5734 X, E, BO, /*IsXLHSInRHSPart=*/true, 5735 llvm::AtomicOrdering::Monotonic, Loc, 5736 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5737 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5738 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5739 CGF.emitOMPSimpleStore( 5740 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5741 VD->getType().getNonReferenceType(), Loc); 5742 PrivateScope.addPrivate(VD, LHSTemp); 5743 (void)PrivateScope.Privatize(); 5744 return CGF.EmitAnyExpr(UpExpr); 5745 }); 5746 }; 5747 if ((*IPriv)->getType()->isArrayType()) { 5748 // Emit atomic reduction for array section. 5749 const auto *RHSVar = 5750 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5751 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5752 AtomicRedGen, XExpr, EExpr, UpExpr); 5753 } else { 5754 // Emit atomic reduction for array subscript or single variable. 5755 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5756 } 5757 } else { 5758 // Emit as a critical region. 5759 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5760 const Expr *, const Expr *) { 5761 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5762 std::string Name = RT.getName({"atomic_reduction"}); 5763 RT.emitCriticalRegion( 5764 CGF, Name, 5765 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5766 Action.Enter(CGF); 5767 emitReductionCombiner(CGF, E); 5768 }, 5769 Loc); 5770 }; 5771 if ((*IPriv)->getType()->isArrayType()) { 5772 const auto *LHSVar = 5773 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5774 const auto *RHSVar = 5775 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5776 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5777 CritRedGen); 5778 } else { 5779 CritRedGen(CGF, nullptr, nullptr, nullptr); 5780 } 5781 } 5782 ++ILHS; 5783 ++IRHS; 5784 ++IPriv; 5785 } 5786 }; 5787 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5788 if (!WithNowait) { 5789 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5790 llvm::Value *EndArgs[] = { 5791 IdentTLoc, // ident_t *<loc> 5792 ThreadId, // i32 <gtid> 5793 Lock // kmp_critical_name *&<lock> 5794 }; 5795 CommonActionTy Action(nullptr, llvm::None, 5796 OMPBuilder.getOrCreateRuntimeFunction( 5797 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5798 EndArgs); 5799 AtomicRCG.setAction(Action); 5800 AtomicRCG(CGF); 5801 } else { 5802 AtomicRCG(CGF); 5803 } 5804 5805 CGF.EmitBranch(DefaultBB); 5806 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5807 } 5808 5809 /// Generates unique name for artificial threadprivate variables. 5810 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5811 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5812 const Expr *Ref) { 5813 SmallString<256> Buffer; 5814 llvm::raw_svector_ostream Out(Buffer); 5815 const clang::DeclRefExpr *DE; 5816 const VarDecl *D = ::getBaseDecl(Ref, DE); 5817 if (!D) 5818 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5819 D = D->getCanonicalDecl(); 5820 std::string Name = CGM.getOpenMPRuntime().getName( 5821 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5822 Out << Prefix << Name << "_" 5823 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5824 return std::string(Out.str()); 5825 } 5826 5827 /// Emits reduction initializer function: 5828 /// \code 5829 /// void @.red_init(void* %arg, void* %orig) { 5830 /// %0 = bitcast void* %arg to <type>* 5831 /// store <type> <init>, <type>* %0 5832 /// ret void 5833 /// } 5834 /// \endcode 5835 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5836 SourceLocation Loc, 5837 ReductionCodeGen &RCG, unsigned N) { 5838 ASTContext &C = CGM.getContext(); 5839 QualType VoidPtrTy = C.VoidPtrTy; 5840 VoidPtrTy.addRestrict(); 5841 FunctionArgList Args; 5842 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5843 ImplicitParamDecl::Other); 5844 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5845 ImplicitParamDecl::Other); 5846 Args.emplace_back(&Param); 5847 Args.emplace_back(&ParamOrig); 5848 const auto &FnInfo = 5849 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5850 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5851 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5852 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5853 Name, &CGM.getModule()); 5854 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5855 Fn->setDoesNotRecurse(); 5856 CodeGenFunction CGF(CGM); 5857 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5858 QualType PrivateType = RCG.getPrivateType(N); 5859 Address PrivateAddr = CGF.EmitLoadOfPointer( 5860 CGF.Builder.CreateElementBitCast( 5861 CGF.GetAddrOfLocalVar(&Param), 5862 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5863 C.getPointerType(PrivateType)->castAs<PointerType>()); 5864 llvm::Value *Size = nullptr; 5865 // If the size of the reduction item is non-constant, load it from global 5866 // threadprivate variable. 5867 if (RCG.getSizes(N).second) { 5868 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5869 CGF, CGM.getContext().getSizeType(), 5870 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5871 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5872 CGM.getContext().getSizeType(), Loc); 5873 } 5874 RCG.emitAggregateType(CGF, N, Size); 5875 Address OrigAddr = Address::invalid(); 5876 // If initializer uses initializer from declare reduction construct, emit a 5877 // pointer to the address of the original reduction item (reuired by reduction 5878 // initializer) 5879 if (RCG.usesReductionInitializer(N)) { 5880 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5881 OrigAddr = CGF.EmitLoadOfPointer( 5882 SharedAddr, 5883 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5884 } 5885 // Emit the initializer: 5886 // %0 = bitcast void* %arg to <type>* 5887 // store <type> <init>, <type>* %0 5888 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5889 [](CodeGenFunction &) { return false; }); 5890 CGF.FinishFunction(); 5891 return Fn; 5892 } 5893 5894 /// Emits reduction combiner function: 5895 /// \code 5896 /// void @.red_comb(void* %arg0, void* %arg1) { 5897 /// %lhs = bitcast void* %arg0 to <type>* 5898 /// %rhs = bitcast void* %arg1 to <type>* 5899 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5900 /// store <type> %2, <type>* %lhs 5901 /// ret void 5902 /// } 5903 /// \endcode 5904 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5905 SourceLocation Loc, 5906 ReductionCodeGen &RCG, unsigned N, 5907 const Expr *ReductionOp, 5908 const Expr *LHS, const Expr *RHS, 5909 const Expr *PrivateRef) { 5910 ASTContext &C = CGM.getContext(); 5911 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5912 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5913 FunctionArgList Args; 5914 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5915 C.VoidPtrTy, ImplicitParamDecl::Other); 5916 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5917 ImplicitParamDecl::Other); 5918 Args.emplace_back(&ParamInOut); 5919 Args.emplace_back(&ParamIn); 5920 const auto &FnInfo = 5921 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5922 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5923 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5924 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5925 Name, &CGM.getModule()); 5926 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5927 Fn->setDoesNotRecurse(); 5928 CodeGenFunction CGF(CGM); 5929 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5930 llvm::Value *Size = nullptr; 5931 // If the size of the reduction item is non-constant, load it from global 5932 // threadprivate variable. 5933 if (RCG.getSizes(N).second) { 5934 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5935 CGF, CGM.getContext().getSizeType(), 5936 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5937 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5938 CGM.getContext().getSizeType(), Loc); 5939 } 5940 RCG.emitAggregateType(CGF, N, Size); 5941 // Remap lhs and rhs variables to the addresses of the function arguments. 5942 // %lhs = bitcast void* %arg0 to <type>* 5943 // %rhs = bitcast void* %arg1 to <type>* 5944 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5945 PrivateScope.addPrivate( 5946 LHSVD, 5947 // Pull out the pointer to the variable. 5948 CGF.EmitLoadOfPointer( 5949 CGF.Builder.CreateElementBitCast( 5950 CGF.GetAddrOfLocalVar(&ParamInOut), 5951 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5952 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5953 PrivateScope.addPrivate( 5954 RHSVD, 5955 // Pull out the pointer to the variable. 5956 CGF.EmitLoadOfPointer( 5957 CGF.Builder.CreateElementBitCast( 5958 CGF.GetAddrOfLocalVar(&ParamIn), 5959 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5960 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5961 PrivateScope.Privatize(); 5962 // Emit the combiner body: 5963 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5964 // store <type> %2, <type>* %lhs 5965 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5966 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5967 cast<DeclRefExpr>(RHS)); 5968 CGF.FinishFunction(); 5969 return Fn; 5970 } 5971 5972 /// Emits reduction finalizer function: 5973 /// \code 5974 /// void @.red_fini(void* %arg) { 5975 /// %0 = bitcast void* %arg to <type>* 5976 /// <destroy>(<type>* %0) 5977 /// ret void 5978 /// } 5979 /// \endcode 5980 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5981 SourceLocation Loc, 5982 ReductionCodeGen &RCG, unsigned N) { 5983 if (!RCG.needCleanups(N)) 5984 return nullptr; 5985 ASTContext &C = CGM.getContext(); 5986 FunctionArgList Args; 5987 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5988 ImplicitParamDecl::Other); 5989 Args.emplace_back(&Param); 5990 const auto &FnInfo = 5991 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5992 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5993 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5994 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5995 Name, &CGM.getModule()); 5996 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5997 Fn->setDoesNotRecurse(); 5998 CodeGenFunction CGF(CGM); 5999 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6000 Address PrivateAddr = CGF.EmitLoadOfPointer( 6001 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 6002 llvm::Value *Size = nullptr; 6003 // If the size of the reduction item is non-constant, load it from global 6004 // threadprivate variable. 6005 if (RCG.getSizes(N).second) { 6006 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6007 CGF, CGM.getContext().getSizeType(), 6008 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6009 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6010 CGM.getContext().getSizeType(), Loc); 6011 } 6012 RCG.emitAggregateType(CGF, N, Size); 6013 // Emit the finalizer body: 6014 // <destroy>(<type>* %0) 6015 RCG.emitCleanups(CGF, N, PrivateAddr); 6016 CGF.FinishFunction(Loc); 6017 return Fn; 6018 } 6019 6020 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6021 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6022 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6023 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6024 return nullptr; 6025 6026 // Build typedef struct: 6027 // kmp_taskred_input { 6028 // void *reduce_shar; // shared reduction item 6029 // void *reduce_orig; // original reduction item used for initialization 6030 // size_t reduce_size; // size of data item 6031 // void *reduce_init; // data initialization routine 6032 // void *reduce_fini; // data finalization routine 6033 // void *reduce_comb; // data combiner routine 6034 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6035 // } kmp_taskred_input_t; 6036 ASTContext &C = CGM.getContext(); 6037 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6038 RD->startDefinition(); 6039 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6040 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6041 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6042 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6043 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6044 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6045 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6046 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6047 RD->completeDefinition(); 6048 QualType RDType = C.getRecordType(RD); 6049 unsigned Size = Data.ReductionVars.size(); 6050 llvm::APInt ArraySize(/*numBits=*/64, Size); 6051 QualType ArrayRDType = C.getConstantArrayType( 6052 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6053 // kmp_task_red_input_t .rd_input.[Size]; 6054 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6055 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6056 Data.ReductionCopies, Data.ReductionOps); 6057 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6058 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6059 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6060 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6061 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6062 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6063 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6064 ".rd_input.gep."); 6065 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6066 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6067 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6068 RCG.emitSharedOrigLValue(CGF, Cnt); 6069 llvm::Value *CastedShared = 6070 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6071 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6072 // ElemLVal.reduce_orig = &Origs[Cnt]; 6073 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6074 llvm::Value *CastedOrig = 6075 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6076 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6077 RCG.emitAggregateType(CGF, Cnt); 6078 llvm::Value *SizeValInChars; 6079 llvm::Value *SizeVal; 6080 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6081 // We use delayed creation/initialization for VLAs and array sections. It is 6082 // required because runtime does not provide the way to pass the sizes of 6083 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6084 // threadprivate global variables are used to store these values and use 6085 // them in the functions. 6086 bool DelayedCreation = !!SizeVal; 6087 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6088 /*isSigned=*/false); 6089 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6090 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6091 // ElemLVal.reduce_init = init; 6092 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6093 llvm::Value *InitAddr = 6094 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6095 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6096 // ElemLVal.reduce_fini = fini; 6097 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6098 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6099 llvm::Value *FiniAddr = Fini 6100 ? CGF.EmitCastToVoidPtr(Fini) 6101 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6102 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6103 // ElemLVal.reduce_comb = comb; 6104 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6105 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6106 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6107 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6108 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6109 // ElemLVal.flags = 0; 6110 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6111 if (DelayedCreation) { 6112 CGF.EmitStoreOfScalar( 6113 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6114 FlagsLVal); 6115 } else 6116 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6117 FlagsLVal.getType()); 6118 } 6119 if (Data.IsReductionWithTaskMod) { 6120 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6121 // is_ws, int num, void *data); 6122 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6123 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6124 CGM.IntTy, /*isSigned=*/true); 6125 llvm::Value *Args[] = { 6126 IdentTLoc, GTid, 6127 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6128 /*isSigned=*/true), 6129 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6130 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6131 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6132 return CGF.EmitRuntimeCall( 6133 OMPBuilder.getOrCreateRuntimeFunction( 6134 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6135 Args); 6136 } 6137 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6138 llvm::Value *Args[] = { 6139 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6140 /*isSigned=*/true), 6141 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6142 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6143 CGM.VoidPtrTy)}; 6144 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6145 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6146 Args); 6147 } 6148 6149 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6150 SourceLocation Loc, 6151 bool IsWorksharingReduction) { 6152 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6153 // is_ws, int num, void *data); 6154 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6155 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6156 CGM.IntTy, /*isSigned=*/true); 6157 llvm::Value *Args[] = {IdentTLoc, GTid, 6158 llvm::ConstantInt::get(CGM.IntTy, 6159 IsWorksharingReduction ? 1 : 0, 6160 /*isSigned=*/true)}; 6161 (void)CGF.EmitRuntimeCall( 6162 OMPBuilder.getOrCreateRuntimeFunction( 6163 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6164 Args); 6165 } 6166 6167 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6168 SourceLocation Loc, 6169 ReductionCodeGen &RCG, 6170 unsigned N) { 6171 auto Sizes = RCG.getSizes(N); 6172 // Emit threadprivate global variable if the type is non-constant 6173 // (Sizes.second = nullptr). 6174 if (Sizes.second) { 6175 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6176 /*isSigned=*/false); 6177 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6178 CGF, CGM.getContext().getSizeType(), 6179 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6180 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6181 } 6182 } 6183 6184 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6185 SourceLocation Loc, 6186 llvm::Value *ReductionsPtr, 6187 LValue SharedLVal) { 6188 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6189 // *d); 6190 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6191 CGM.IntTy, 6192 /*isSigned=*/true), 6193 ReductionsPtr, 6194 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6195 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6196 return Address( 6197 CGF.EmitRuntimeCall( 6198 OMPBuilder.getOrCreateRuntimeFunction( 6199 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6200 Args), 6201 CGF.Int8Ty, SharedLVal.getAlignment()); 6202 } 6203 6204 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6205 const OMPTaskDataTy &Data) { 6206 if (!CGF.HaveInsertPoint()) 6207 return; 6208 6209 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6210 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6211 OMPBuilder.createTaskwait(CGF.Builder); 6212 } else { 6213 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6214 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6215 auto &M = CGM.getModule(); 6216 Address DependenciesArray = Address::invalid(); 6217 llvm::Value *NumOfElements; 6218 std::tie(NumOfElements, DependenciesArray) = 6219 emitDependClause(CGF, Data.Dependences, Loc); 6220 llvm::Value *DepWaitTaskArgs[6]; 6221 if (!Data.Dependences.empty()) { 6222 DepWaitTaskArgs[0] = UpLoc; 6223 DepWaitTaskArgs[1] = ThreadID; 6224 DepWaitTaskArgs[2] = NumOfElements; 6225 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6226 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6227 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6228 6229 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6230 6231 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6232 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6233 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6234 // is specified. 6235 CGF.EmitRuntimeCall( 6236 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6237 DepWaitTaskArgs); 6238 6239 } else { 6240 6241 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6242 // global_tid); 6243 llvm::Value *Args[] = {UpLoc, ThreadID}; 6244 // Ignore return result until untied tasks are supported. 6245 CGF.EmitRuntimeCall( 6246 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6247 Args); 6248 } 6249 } 6250 6251 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6252 Region->emitUntiedSwitch(CGF); 6253 } 6254 6255 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6256 OpenMPDirectiveKind InnerKind, 6257 const RegionCodeGenTy &CodeGen, 6258 bool HasCancel) { 6259 if (!CGF.HaveInsertPoint()) 6260 return; 6261 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6262 InnerKind != OMPD_critical && 6263 InnerKind != OMPD_master && 6264 InnerKind != OMPD_masked); 6265 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6266 } 6267 6268 namespace { 6269 enum RTCancelKind { 6270 CancelNoreq = 0, 6271 CancelParallel = 1, 6272 CancelLoop = 2, 6273 CancelSections = 3, 6274 CancelTaskgroup = 4 6275 }; 6276 } // anonymous namespace 6277 6278 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6279 RTCancelKind CancelKind = CancelNoreq; 6280 if (CancelRegion == OMPD_parallel) 6281 CancelKind = CancelParallel; 6282 else if (CancelRegion == OMPD_for) 6283 CancelKind = CancelLoop; 6284 else if (CancelRegion == OMPD_sections) 6285 CancelKind = CancelSections; 6286 else { 6287 assert(CancelRegion == OMPD_taskgroup); 6288 CancelKind = CancelTaskgroup; 6289 } 6290 return CancelKind; 6291 } 6292 6293 void CGOpenMPRuntime::emitCancellationPointCall( 6294 CodeGenFunction &CGF, SourceLocation Loc, 6295 OpenMPDirectiveKind CancelRegion) { 6296 if (!CGF.HaveInsertPoint()) 6297 return; 6298 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6299 // global_tid, kmp_int32 cncl_kind); 6300 if (auto *OMPRegionInfo = 6301 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6302 // For 'cancellation point taskgroup', the task region info may not have a 6303 // cancel. This may instead happen in another adjacent task. 6304 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6305 llvm::Value *Args[] = { 6306 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6307 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6308 // Ignore return result until untied tasks are supported. 6309 llvm::Value *Result = CGF.EmitRuntimeCall( 6310 OMPBuilder.getOrCreateRuntimeFunction( 6311 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6312 Args); 6313 // if (__kmpc_cancellationpoint()) { 6314 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6315 // exit from construct; 6316 // } 6317 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6318 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6319 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6320 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6321 CGF.EmitBlock(ExitBB); 6322 if (CancelRegion == OMPD_parallel) 6323 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6324 // exit from construct; 6325 CodeGenFunction::JumpDest CancelDest = 6326 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6327 CGF.EmitBranchThroughCleanup(CancelDest); 6328 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6329 } 6330 } 6331 } 6332 6333 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6334 const Expr *IfCond, 6335 OpenMPDirectiveKind CancelRegion) { 6336 if (!CGF.HaveInsertPoint()) 6337 return; 6338 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6339 // kmp_int32 cncl_kind); 6340 auto &M = CGM.getModule(); 6341 if (auto *OMPRegionInfo = 6342 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6343 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6344 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6345 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6346 llvm::Value *Args[] = { 6347 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6348 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6349 // Ignore return result until untied tasks are supported. 6350 llvm::Value *Result = CGF.EmitRuntimeCall( 6351 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6352 // if (__kmpc_cancel()) { 6353 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6354 // exit from construct; 6355 // } 6356 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6357 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6358 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6359 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6360 CGF.EmitBlock(ExitBB); 6361 if (CancelRegion == OMPD_parallel) 6362 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6363 // exit from construct; 6364 CodeGenFunction::JumpDest CancelDest = 6365 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6366 CGF.EmitBranchThroughCleanup(CancelDest); 6367 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6368 }; 6369 if (IfCond) { 6370 emitIfClause(CGF, IfCond, ThenGen, 6371 [](CodeGenFunction &, PrePostActionTy &) {}); 6372 } else { 6373 RegionCodeGenTy ThenRCG(ThenGen); 6374 ThenRCG(CGF); 6375 } 6376 } 6377 } 6378 6379 namespace { 6380 /// Cleanup action for uses_allocators support. 6381 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6382 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6383 6384 public: 6385 OMPUsesAllocatorsActionTy( 6386 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6387 : Allocators(Allocators) {} 6388 void Enter(CodeGenFunction &CGF) override { 6389 if (!CGF.HaveInsertPoint()) 6390 return; 6391 for (const auto &AllocatorData : Allocators) { 6392 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6393 CGF, AllocatorData.first, AllocatorData.second); 6394 } 6395 } 6396 void Exit(CodeGenFunction &CGF) override { 6397 if (!CGF.HaveInsertPoint()) 6398 return; 6399 for (const auto &AllocatorData : Allocators) { 6400 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6401 AllocatorData.first); 6402 } 6403 } 6404 }; 6405 } // namespace 6406 6407 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6408 const OMPExecutableDirective &D, StringRef ParentName, 6409 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6410 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6411 assert(!ParentName.empty() && "Invalid target region parent name!"); 6412 HasEmittedTargetRegion = true; 6413 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6414 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6415 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6416 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6417 if (!D.AllocatorTraits) 6418 continue; 6419 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6420 } 6421 } 6422 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6423 CodeGen.setAction(UsesAllocatorAction); 6424 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6425 IsOffloadEntry, CodeGen); 6426 } 6427 6428 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6429 const Expr *Allocator, 6430 const Expr *AllocatorTraits) { 6431 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6432 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6433 // Use default memspace handle. 6434 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6435 llvm::Value *NumTraits = llvm::ConstantInt::get( 6436 CGF.IntTy, cast<ConstantArrayType>( 6437 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6438 ->getSize() 6439 .getLimitedValue()); 6440 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6441 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6442 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 6443 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6444 AllocatorTraitsLVal.getBaseInfo(), 6445 AllocatorTraitsLVal.getTBAAInfo()); 6446 llvm::Value *Traits = 6447 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6448 6449 llvm::Value *AllocatorVal = 6450 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6451 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6452 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6453 // Store to allocator. 6454 CGF.EmitVarDecl(*cast<VarDecl>( 6455 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6456 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6457 AllocatorVal = 6458 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6459 Allocator->getType(), Allocator->getExprLoc()); 6460 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6461 } 6462 6463 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6464 const Expr *Allocator) { 6465 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6466 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6467 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6468 llvm::Value *AllocatorVal = 6469 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6470 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6471 CGF.getContext().VoidPtrTy, 6472 Allocator->getExprLoc()); 6473 (void)CGF.EmitRuntimeCall( 6474 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6475 OMPRTL___kmpc_destroy_allocator), 6476 {ThreadId, AllocatorVal}); 6477 } 6478 6479 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6480 const OMPExecutableDirective &D, StringRef ParentName, 6481 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6482 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6483 // Create a unique name for the entry function using the source location 6484 // information of the current target region. The name will be something like: 6485 // 6486 // __omp_offloading_DD_FFFF_PP_lBB 6487 // 6488 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6489 // mangled name of the function that encloses the target region and BB is the 6490 // line number of the target region. 6491 6492 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || 6493 !CGM.getLangOpts().OpenMPOffloadMandatory; 6494 unsigned DeviceID; 6495 unsigned FileID; 6496 unsigned Line; 6497 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6498 Line); 6499 SmallString<64> EntryFnName; 6500 { 6501 llvm::raw_svector_ostream OS(EntryFnName); 6502 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6503 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6504 } 6505 6506 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6507 6508 CodeGenFunction CGF(CGM, true); 6509 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6510 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6511 6512 if (BuildOutlinedFn) 6513 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6514 6515 // If this target outline function is not an offload entry, we don't need to 6516 // register it. 6517 if (!IsOffloadEntry) 6518 return; 6519 6520 // The target region ID is used by the runtime library to identify the current 6521 // target region, so it only has to be unique and not necessarily point to 6522 // anything. It could be the pointer to the outlined function that implements 6523 // the target region, but we aren't using that so that the compiler doesn't 6524 // need to keep that, and could therefore inline the host function if proven 6525 // worthwhile during optimization. In the other hand, if emitting code for the 6526 // device, the ID has to be the function address so that it can retrieved from 6527 // the offloading entry and launched by the runtime library. We also mark the 6528 // outlined function to have external linkage in case we are emitting code for 6529 // the device, because these functions will be entry points to the device. 6530 6531 if (CGM.getLangOpts().OpenMPIsDevice) { 6532 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6533 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6534 OutlinedFn->setDSOLocal(false); 6535 if (CGM.getTriple().isAMDGCN()) 6536 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6537 } else { 6538 std::string Name = getName({EntryFnName, "region_id"}); 6539 OutlinedFnID = new llvm::GlobalVariable( 6540 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6541 llvm::GlobalValue::WeakAnyLinkage, 6542 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6543 } 6544 6545 // If we do not allow host fallback we still need a named address to use. 6546 llvm::Constant *TargetRegionEntryAddr = OutlinedFn; 6547 if (!BuildOutlinedFn) { 6548 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && 6549 "Named kernel already exists?"); 6550 TargetRegionEntryAddr = new llvm::GlobalVariable( 6551 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6552 llvm::GlobalValue::InternalLinkage, 6553 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); 6554 } 6555 6556 // Register the information for the entry associated with this target region. 6557 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6558 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, 6559 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6560 6561 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6562 int32_t DefaultValTeams = -1; 6563 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6564 if (DefaultValTeams > 0 && OutlinedFn) { 6565 OutlinedFn->addFnAttr("omp_target_num_teams", 6566 std::to_string(DefaultValTeams)); 6567 } 6568 int32_t DefaultValThreads = -1; 6569 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6570 if (DefaultValThreads > 0 && OutlinedFn) { 6571 OutlinedFn->addFnAttr("omp_target_thread_limit", 6572 std::to_string(DefaultValThreads)); 6573 } 6574 6575 if (BuildOutlinedFn) 6576 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6577 } 6578 6579 /// Checks if the expression is constant or does not have non-trivial function 6580 /// calls. 6581 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6582 // We can skip constant expressions. 6583 // We can skip expressions with trivial calls or simple expressions. 6584 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6585 !E->hasNonTrivialCall(Ctx)) && 6586 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6587 } 6588 6589 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6590 const Stmt *Body) { 6591 const Stmt *Child = Body->IgnoreContainers(); 6592 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6593 Child = nullptr; 6594 for (const Stmt *S : C->body()) { 6595 if (const auto *E = dyn_cast<Expr>(S)) { 6596 if (isTrivial(Ctx, E)) 6597 continue; 6598 } 6599 // Some of the statements can be ignored. 6600 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6601 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6602 continue; 6603 // Analyze declarations. 6604 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6605 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6606 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6607 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6608 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6609 isa<UsingDirectiveDecl>(D) || 6610 isa<OMPDeclareReductionDecl>(D) || 6611 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6612 return true; 6613 const auto *VD = dyn_cast<VarDecl>(D); 6614 if (!VD) 6615 return false; 6616 return VD->hasGlobalStorage() || !VD->isUsed(); 6617 })) 6618 continue; 6619 } 6620 // Found multiple children - cannot get the one child only. 6621 if (Child) 6622 return nullptr; 6623 Child = S; 6624 } 6625 if (Child) 6626 Child = Child->IgnoreContainers(); 6627 } 6628 return Child; 6629 } 6630 6631 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6632 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6633 int32_t &DefaultVal) { 6634 6635 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6636 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6637 "Expected target-based executable directive."); 6638 switch (DirectiveKind) { 6639 case OMPD_target: { 6640 const auto *CS = D.getInnermostCapturedStmt(); 6641 const auto *Body = 6642 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6643 const Stmt *ChildStmt = 6644 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6645 if (const auto *NestedDir = 6646 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6647 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6648 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6649 const Expr *NumTeams = 6650 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6651 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6652 if (auto Constant = 6653 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6654 DefaultVal = Constant->getExtValue(); 6655 return NumTeams; 6656 } 6657 DefaultVal = 0; 6658 return nullptr; 6659 } 6660 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6661 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6662 DefaultVal = 1; 6663 return nullptr; 6664 } 6665 DefaultVal = 1; 6666 return nullptr; 6667 } 6668 // A value of -1 is used to check if we need to emit no teams region 6669 DefaultVal = -1; 6670 return nullptr; 6671 } 6672 case OMPD_target_teams: 6673 case OMPD_target_teams_distribute: 6674 case OMPD_target_teams_distribute_simd: 6675 case OMPD_target_teams_distribute_parallel_for: 6676 case OMPD_target_teams_distribute_parallel_for_simd: { 6677 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6678 const Expr *NumTeams = 6679 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6680 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6681 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6682 DefaultVal = Constant->getExtValue(); 6683 return NumTeams; 6684 } 6685 DefaultVal = 0; 6686 return nullptr; 6687 } 6688 case OMPD_target_parallel: 6689 case OMPD_target_parallel_for: 6690 case OMPD_target_parallel_for_simd: 6691 case OMPD_target_simd: 6692 DefaultVal = 1; 6693 return nullptr; 6694 case OMPD_parallel: 6695 case OMPD_for: 6696 case OMPD_parallel_for: 6697 case OMPD_parallel_master: 6698 case OMPD_parallel_sections: 6699 case OMPD_for_simd: 6700 case OMPD_parallel_for_simd: 6701 case OMPD_cancel: 6702 case OMPD_cancellation_point: 6703 case OMPD_ordered: 6704 case OMPD_threadprivate: 6705 case OMPD_allocate: 6706 case OMPD_task: 6707 case OMPD_simd: 6708 case OMPD_tile: 6709 case OMPD_unroll: 6710 case OMPD_sections: 6711 case OMPD_section: 6712 case OMPD_single: 6713 case OMPD_master: 6714 case OMPD_critical: 6715 case OMPD_taskyield: 6716 case OMPD_barrier: 6717 case OMPD_taskwait: 6718 case OMPD_taskgroup: 6719 case OMPD_atomic: 6720 case OMPD_flush: 6721 case OMPD_depobj: 6722 case OMPD_scan: 6723 case OMPD_teams: 6724 case OMPD_target_data: 6725 case OMPD_target_exit_data: 6726 case OMPD_target_enter_data: 6727 case OMPD_distribute: 6728 case OMPD_distribute_simd: 6729 case OMPD_distribute_parallel_for: 6730 case OMPD_distribute_parallel_for_simd: 6731 case OMPD_teams_distribute: 6732 case OMPD_teams_distribute_simd: 6733 case OMPD_teams_distribute_parallel_for: 6734 case OMPD_teams_distribute_parallel_for_simd: 6735 case OMPD_target_update: 6736 case OMPD_declare_simd: 6737 case OMPD_declare_variant: 6738 case OMPD_begin_declare_variant: 6739 case OMPD_end_declare_variant: 6740 case OMPD_declare_target: 6741 case OMPD_end_declare_target: 6742 case OMPD_declare_reduction: 6743 case OMPD_declare_mapper: 6744 case OMPD_taskloop: 6745 case OMPD_taskloop_simd: 6746 case OMPD_master_taskloop: 6747 case OMPD_master_taskloop_simd: 6748 case OMPD_parallel_master_taskloop: 6749 case OMPD_parallel_master_taskloop_simd: 6750 case OMPD_requires: 6751 case OMPD_metadirective: 6752 case OMPD_unknown: 6753 break; 6754 default: 6755 break; 6756 } 6757 llvm_unreachable("Unexpected directive kind."); 6758 } 6759 6760 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6761 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6762 assert(!CGF.getLangOpts().OpenMPIsDevice && 6763 "Clauses associated with the teams directive expected to be emitted " 6764 "only for the host!"); 6765 CGBuilderTy &Bld = CGF.Builder; 6766 int32_t DefaultNT = -1; 6767 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6768 if (NumTeams != nullptr) { 6769 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6770 6771 switch (DirectiveKind) { 6772 case OMPD_target: { 6773 const auto *CS = D.getInnermostCapturedStmt(); 6774 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6775 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6776 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6777 /*IgnoreResultAssign*/ true); 6778 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6779 /*isSigned=*/true); 6780 } 6781 case OMPD_target_teams: 6782 case OMPD_target_teams_distribute: 6783 case OMPD_target_teams_distribute_simd: 6784 case OMPD_target_teams_distribute_parallel_for: 6785 case OMPD_target_teams_distribute_parallel_for_simd: { 6786 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6787 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6788 /*IgnoreResultAssign*/ true); 6789 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6790 /*isSigned=*/true); 6791 } 6792 default: 6793 break; 6794 } 6795 } else if (DefaultNT == -1) { 6796 return nullptr; 6797 } 6798 6799 return Bld.getInt32(DefaultNT); 6800 } 6801 6802 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6803 llvm::Value *DefaultThreadLimitVal) { 6804 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6805 CGF.getContext(), CS->getCapturedStmt()); 6806 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6807 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6808 llvm::Value *NumThreads = nullptr; 6809 llvm::Value *CondVal = nullptr; 6810 // Handle if clause. If if clause present, the number of threads is 6811 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6812 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6813 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6814 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6815 const OMPIfClause *IfClause = nullptr; 6816 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6817 if (C->getNameModifier() == OMPD_unknown || 6818 C->getNameModifier() == OMPD_parallel) { 6819 IfClause = C; 6820 break; 6821 } 6822 } 6823 if (IfClause) { 6824 const Expr *Cond = IfClause->getCondition(); 6825 bool Result; 6826 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6827 if (!Result) 6828 return CGF.Builder.getInt32(1); 6829 } else { 6830 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6831 if (const auto *PreInit = 6832 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6833 for (const auto *I : PreInit->decls()) { 6834 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6835 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6836 } else { 6837 CodeGenFunction::AutoVarEmission Emission = 6838 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6839 CGF.EmitAutoVarCleanups(Emission); 6840 } 6841 } 6842 } 6843 CondVal = CGF.EvaluateExprAsBool(Cond); 6844 } 6845 } 6846 } 6847 // Check the value of num_threads clause iff if clause was not specified 6848 // or is not evaluated to false. 6849 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6850 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6851 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6852 const auto *NumThreadsClause = 6853 Dir->getSingleClause<OMPNumThreadsClause>(); 6854 CodeGenFunction::LexicalScope Scope( 6855 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6856 if (const auto *PreInit = 6857 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6858 for (const auto *I : PreInit->decls()) { 6859 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6860 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6861 } else { 6862 CodeGenFunction::AutoVarEmission Emission = 6863 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6864 CGF.EmitAutoVarCleanups(Emission); 6865 } 6866 } 6867 } 6868 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6869 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6870 /*isSigned=*/false); 6871 if (DefaultThreadLimitVal) 6872 NumThreads = CGF.Builder.CreateSelect( 6873 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6874 DefaultThreadLimitVal, NumThreads); 6875 } else { 6876 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6877 : CGF.Builder.getInt32(0); 6878 } 6879 // Process condition of the if clause. 6880 if (CondVal) { 6881 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6882 CGF.Builder.getInt32(1)); 6883 } 6884 return NumThreads; 6885 } 6886 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6887 return CGF.Builder.getInt32(1); 6888 return DefaultThreadLimitVal; 6889 } 6890 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6891 : CGF.Builder.getInt32(0); 6892 } 6893 6894 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6895 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6896 int32_t &DefaultVal) { 6897 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6898 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6899 "Expected target-based executable directive."); 6900 6901 switch (DirectiveKind) { 6902 case OMPD_target: 6903 // Teams have no clause thread_limit 6904 return nullptr; 6905 case OMPD_target_teams: 6906 case OMPD_target_teams_distribute: 6907 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6908 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6909 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6910 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6911 if (auto Constant = 6912 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6913 DefaultVal = Constant->getExtValue(); 6914 return ThreadLimit; 6915 } 6916 return nullptr; 6917 case OMPD_target_parallel: 6918 case OMPD_target_parallel_for: 6919 case OMPD_target_parallel_for_simd: 6920 case OMPD_target_teams_distribute_parallel_for: 6921 case OMPD_target_teams_distribute_parallel_for_simd: { 6922 Expr *ThreadLimit = nullptr; 6923 Expr *NumThreads = nullptr; 6924 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6925 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6926 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6927 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6928 if (auto Constant = 6929 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6930 DefaultVal = Constant->getExtValue(); 6931 } 6932 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6933 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6934 NumThreads = NumThreadsClause->getNumThreads(); 6935 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6936 if (auto Constant = 6937 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6938 if (Constant->getExtValue() < DefaultVal) { 6939 DefaultVal = Constant->getExtValue(); 6940 ThreadLimit = NumThreads; 6941 } 6942 } 6943 } 6944 } 6945 return ThreadLimit; 6946 } 6947 case OMPD_target_teams_distribute_simd: 6948 case OMPD_target_simd: 6949 DefaultVal = 1; 6950 return nullptr; 6951 case OMPD_parallel: 6952 case OMPD_for: 6953 case OMPD_parallel_for: 6954 case OMPD_parallel_master: 6955 case OMPD_parallel_sections: 6956 case OMPD_for_simd: 6957 case OMPD_parallel_for_simd: 6958 case OMPD_cancel: 6959 case OMPD_cancellation_point: 6960 case OMPD_ordered: 6961 case OMPD_threadprivate: 6962 case OMPD_allocate: 6963 case OMPD_task: 6964 case OMPD_simd: 6965 case OMPD_tile: 6966 case OMPD_unroll: 6967 case OMPD_sections: 6968 case OMPD_section: 6969 case OMPD_single: 6970 case OMPD_master: 6971 case OMPD_critical: 6972 case OMPD_taskyield: 6973 case OMPD_barrier: 6974 case OMPD_taskwait: 6975 case OMPD_taskgroup: 6976 case OMPD_atomic: 6977 case OMPD_flush: 6978 case OMPD_depobj: 6979 case OMPD_scan: 6980 case OMPD_teams: 6981 case OMPD_target_data: 6982 case OMPD_target_exit_data: 6983 case OMPD_target_enter_data: 6984 case OMPD_distribute: 6985 case OMPD_distribute_simd: 6986 case OMPD_distribute_parallel_for: 6987 case OMPD_distribute_parallel_for_simd: 6988 case OMPD_teams_distribute: 6989 case OMPD_teams_distribute_simd: 6990 case OMPD_teams_distribute_parallel_for: 6991 case OMPD_teams_distribute_parallel_for_simd: 6992 case OMPD_target_update: 6993 case OMPD_declare_simd: 6994 case OMPD_declare_variant: 6995 case OMPD_begin_declare_variant: 6996 case OMPD_end_declare_variant: 6997 case OMPD_declare_target: 6998 case OMPD_end_declare_target: 6999 case OMPD_declare_reduction: 7000 case OMPD_declare_mapper: 7001 case OMPD_taskloop: 7002 case OMPD_taskloop_simd: 7003 case OMPD_master_taskloop: 7004 case OMPD_master_taskloop_simd: 7005 case OMPD_parallel_master_taskloop: 7006 case OMPD_parallel_master_taskloop_simd: 7007 case OMPD_requires: 7008 case OMPD_unknown: 7009 break; 7010 default: 7011 break; 7012 } 7013 llvm_unreachable("Unsupported directive kind."); 7014 } 7015 7016 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7017 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7018 assert(!CGF.getLangOpts().OpenMPIsDevice && 7019 "Clauses associated with the teams directive expected to be emitted " 7020 "only for the host!"); 7021 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7022 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7023 "Expected target-based executable directive."); 7024 CGBuilderTy &Bld = CGF.Builder; 7025 llvm::Value *ThreadLimitVal = nullptr; 7026 llvm::Value *NumThreadsVal = nullptr; 7027 switch (DirectiveKind) { 7028 case OMPD_target: { 7029 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7030 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7031 return NumThreads; 7032 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7033 CGF.getContext(), CS->getCapturedStmt()); 7034 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7035 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7036 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7037 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7038 const auto *ThreadLimitClause = 7039 Dir->getSingleClause<OMPThreadLimitClause>(); 7040 CodeGenFunction::LexicalScope Scope( 7041 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7042 if (const auto *PreInit = 7043 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7044 for (const auto *I : PreInit->decls()) { 7045 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7046 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7047 } else { 7048 CodeGenFunction::AutoVarEmission Emission = 7049 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7050 CGF.EmitAutoVarCleanups(Emission); 7051 } 7052 } 7053 } 7054 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7055 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7056 ThreadLimitVal = 7057 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7058 } 7059 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7060 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7061 CS = Dir->getInnermostCapturedStmt(); 7062 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7063 CGF.getContext(), CS->getCapturedStmt()); 7064 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7065 } 7066 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7067 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7068 CS = Dir->getInnermostCapturedStmt(); 7069 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7070 return NumThreads; 7071 } 7072 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7073 return Bld.getInt32(1); 7074 } 7075 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7076 } 7077 case OMPD_target_teams: { 7078 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7079 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7080 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7081 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7082 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7083 ThreadLimitVal = 7084 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7085 } 7086 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7087 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7088 return NumThreads; 7089 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7090 CGF.getContext(), CS->getCapturedStmt()); 7091 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7092 if (Dir->getDirectiveKind() == OMPD_distribute) { 7093 CS = Dir->getInnermostCapturedStmt(); 7094 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7095 return NumThreads; 7096 } 7097 } 7098 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7099 } 7100 case OMPD_target_teams_distribute: 7101 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7102 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7103 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7104 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7105 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7106 ThreadLimitVal = 7107 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7108 } 7109 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7110 case OMPD_target_parallel: 7111 case OMPD_target_parallel_for: 7112 case OMPD_target_parallel_for_simd: 7113 case OMPD_target_teams_distribute_parallel_for: 7114 case OMPD_target_teams_distribute_parallel_for_simd: { 7115 llvm::Value *CondVal = nullptr; 7116 // Handle if clause. If if clause present, the number of threads is 7117 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7118 if (D.hasClausesOfKind<OMPIfClause>()) { 7119 const OMPIfClause *IfClause = nullptr; 7120 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7121 if (C->getNameModifier() == OMPD_unknown || 7122 C->getNameModifier() == OMPD_parallel) { 7123 IfClause = C; 7124 break; 7125 } 7126 } 7127 if (IfClause) { 7128 const Expr *Cond = IfClause->getCondition(); 7129 bool Result; 7130 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7131 if (!Result) 7132 return Bld.getInt32(1); 7133 } else { 7134 CodeGenFunction::RunCleanupsScope Scope(CGF); 7135 CondVal = CGF.EvaluateExprAsBool(Cond); 7136 } 7137 } 7138 } 7139 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7140 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7141 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7142 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7143 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7144 ThreadLimitVal = 7145 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7146 } 7147 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7148 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7149 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7150 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7151 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7152 NumThreadsVal = 7153 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7154 ThreadLimitVal = ThreadLimitVal 7155 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7156 ThreadLimitVal), 7157 NumThreadsVal, ThreadLimitVal) 7158 : NumThreadsVal; 7159 } 7160 if (!ThreadLimitVal) 7161 ThreadLimitVal = Bld.getInt32(0); 7162 if (CondVal) 7163 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7164 return ThreadLimitVal; 7165 } 7166 case OMPD_target_teams_distribute_simd: 7167 case OMPD_target_simd: 7168 return Bld.getInt32(1); 7169 case OMPD_parallel: 7170 case OMPD_for: 7171 case OMPD_parallel_for: 7172 case OMPD_parallel_master: 7173 case OMPD_parallel_sections: 7174 case OMPD_for_simd: 7175 case OMPD_parallel_for_simd: 7176 case OMPD_cancel: 7177 case OMPD_cancellation_point: 7178 case OMPD_ordered: 7179 case OMPD_threadprivate: 7180 case OMPD_allocate: 7181 case OMPD_task: 7182 case OMPD_simd: 7183 case OMPD_tile: 7184 case OMPD_unroll: 7185 case OMPD_sections: 7186 case OMPD_section: 7187 case OMPD_single: 7188 case OMPD_master: 7189 case OMPD_critical: 7190 case OMPD_taskyield: 7191 case OMPD_barrier: 7192 case OMPD_taskwait: 7193 case OMPD_taskgroup: 7194 case OMPD_atomic: 7195 case OMPD_flush: 7196 case OMPD_depobj: 7197 case OMPD_scan: 7198 case OMPD_teams: 7199 case OMPD_target_data: 7200 case OMPD_target_exit_data: 7201 case OMPD_target_enter_data: 7202 case OMPD_distribute: 7203 case OMPD_distribute_simd: 7204 case OMPD_distribute_parallel_for: 7205 case OMPD_distribute_parallel_for_simd: 7206 case OMPD_teams_distribute: 7207 case OMPD_teams_distribute_simd: 7208 case OMPD_teams_distribute_parallel_for: 7209 case OMPD_teams_distribute_parallel_for_simd: 7210 case OMPD_target_update: 7211 case OMPD_declare_simd: 7212 case OMPD_declare_variant: 7213 case OMPD_begin_declare_variant: 7214 case OMPD_end_declare_variant: 7215 case OMPD_declare_target: 7216 case OMPD_end_declare_target: 7217 case OMPD_declare_reduction: 7218 case OMPD_declare_mapper: 7219 case OMPD_taskloop: 7220 case OMPD_taskloop_simd: 7221 case OMPD_master_taskloop: 7222 case OMPD_master_taskloop_simd: 7223 case OMPD_parallel_master_taskloop: 7224 case OMPD_parallel_master_taskloop_simd: 7225 case OMPD_requires: 7226 case OMPD_metadirective: 7227 case OMPD_unknown: 7228 break; 7229 default: 7230 break; 7231 } 7232 llvm_unreachable("Unsupported directive kind."); 7233 } 7234 7235 namespace { 7236 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7237 7238 // Utility to handle information from clauses associated with a given 7239 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7240 // It provides a convenient interface to obtain the information and generate 7241 // code for that information. 7242 class MappableExprsHandler { 7243 public: 7244 /// Values for bit flags used to specify the mapping type for 7245 /// offloading. 7246 enum OpenMPOffloadMappingFlags : uint64_t { 7247 /// No flags 7248 OMP_MAP_NONE = 0x0, 7249 /// Allocate memory on the device and move data from host to device. 7250 OMP_MAP_TO = 0x01, 7251 /// Allocate memory on the device and move data from device to host. 7252 OMP_MAP_FROM = 0x02, 7253 /// Always perform the requested mapping action on the element, even 7254 /// if it was already mapped before. 7255 OMP_MAP_ALWAYS = 0x04, 7256 /// Delete the element from the device environment, ignoring the 7257 /// current reference count associated with the element. 7258 OMP_MAP_DELETE = 0x08, 7259 /// The element being mapped is a pointer-pointee pair; both the 7260 /// pointer and the pointee should be mapped. 7261 OMP_MAP_PTR_AND_OBJ = 0x10, 7262 /// This flags signals that the base address of an entry should be 7263 /// passed to the target kernel as an argument. 7264 OMP_MAP_TARGET_PARAM = 0x20, 7265 /// Signal that the runtime library has to return the device pointer 7266 /// in the current position for the data being mapped. Used when we have the 7267 /// use_device_ptr or use_device_addr clause. 7268 OMP_MAP_RETURN_PARAM = 0x40, 7269 /// This flag signals that the reference being passed is a pointer to 7270 /// private data. 7271 OMP_MAP_PRIVATE = 0x80, 7272 /// Pass the element to the device by value. 7273 OMP_MAP_LITERAL = 0x100, 7274 /// Implicit map 7275 OMP_MAP_IMPLICIT = 0x200, 7276 /// Close is a hint to the runtime to allocate memory close to 7277 /// the target device. 7278 OMP_MAP_CLOSE = 0x400, 7279 /// 0x800 is reserved for compatibility with XLC. 7280 /// Produce a runtime error if the data is not already allocated. 7281 OMP_MAP_PRESENT = 0x1000, 7282 // Increment and decrement a separate reference counter so that the data 7283 // cannot be unmapped within the associated region. Thus, this flag is 7284 // intended to be used on 'target' and 'target data' directives because they 7285 // are inherently structured. It is not intended to be used on 'target 7286 // enter data' and 'target exit data' directives because they are inherently 7287 // dynamic. 7288 // This is an OpenMP extension for the sake of OpenACC support. 7289 OMP_MAP_OMPX_HOLD = 0x2000, 7290 /// Signal that the runtime library should use args as an array of 7291 /// descriptor_dim pointers and use args_size as dims. Used when we have 7292 /// non-contiguous list items in target update directive 7293 OMP_MAP_NON_CONTIG = 0x100000000000, 7294 /// The 16 MSBs of the flags indicate whether the entry is member of some 7295 /// struct/class. 7296 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7297 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7298 }; 7299 7300 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7301 static unsigned getFlagMemberOffset() { 7302 unsigned Offset = 0; 7303 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7304 Remain = Remain >> 1) 7305 Offset++; 7306 return Offset; 7307 } 7308 7309 /// Class that holds debugging information for a data mapping to be passed to 7310 /// the runtime library. 7311 class MappingExprInfo { 7312 /// The variable declaration used for the data mapping. 7313 const ValueDecl *MapDecl = nullptr; 7314 /// The original expression used in the map clause, or null if there is 7315 /// none. 7316 const Expr *MapExpr = nullptr; 7317 7318 public: 7319 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7320 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7321 7322 const ValueDecl *getMapDecl() const { return MapDecl; } 7323 const Expr *getMapExpr() const { return MapExpr; } 7324 }; 7325 7326 /// Class that associates information with a base pointer to be passed to the 7327 /// runtime library. 7328 class BasePointerInfo { 7329 /// The base pointer. 7330 llvm::Value *Ptr = nullptr; 7331 /// The base declaration that refers to this device pointer, or null if 7332 /// there is none. 7333 const ValueDecl *DevPtrDecl = nullptr; 7334 7335 public: 7336 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7337 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7338 llvm::Value *operator*() const { return Ptr; } 7339 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7340 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7341 }; 7342 7343 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7344 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7345 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7346 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7347 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7348 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7349 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7350 7351 /// This structure contains combined information generated for mappable 7352 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7353 /// mappers, and non-contiguous information. 7354 struct MapCombinedInfoTy { 7355 struct StructNonContiguousInfo { 7356 bool IsNonContiguous = false; 7357 MapDimArrayTy Dims; 7358 MapNonContiguousArrayTy Offsets; 7359 MapNonContiguousArrayTy Counts; 7360 MapNonContiguousArrayTy Strides; 7361 }; 7362 MapExprsArrayTy Exprs; 7363 MapBaseValuesArrayTy BasePointers; 7364 MapValuesArrayTy Pointers; 7365 MapValuesArrayTy Sizes; 7366 MapFlagsArrayTy Types; 7367 MapMappersArrayTy Mappers; 7368 StructNonContiguousInfo NonContigInfo; 7369 7370 /// Append arrays in \a CurInfo. 7371 void append(MapCombinedInfoTy &CurInfo) { 7372 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7373 BasePointers.append(CurInfo.BasePointers.begin(), 7374 CurInfo.BasePointers.end()); 7375 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7376 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7377 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7378 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7379 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7380 CurInfo.NonContigInfo.Dims.end()); 7381 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7382 CurInfo.NonContigInfo.Offsets.end()); 7383 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7384 CurInfo.NonContigInfo.Counts.end()); 7385 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7386 CurInfo.NonContigInfo.Strides.end()); 7387 } 7388 }; 7389 7390 /// Map between a struct and the its lowest & highest elements which have been 7391 /// mapped. 7392 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7393 /// HE(FieldIndex, Pointer)} 7394 struct StructRangeInfoTy { 7395 MapCombinedInfoTy PreliminaryMapData; 7396 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7397 0, Address::invalid()}; 7398 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7399 0, Address::invalid()}; 7400 Address Base = Address::invalid(); 7401 Address LB = Address::invalid(); 7402 bool IsArraySection = false; 7403 bool HasCompleteRecord = false; 7404 }; 7405 7406 private: 7407 /// Kind that defines how a device pointer has to be returned. 7408 struct MapInfo { 7409 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7410 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7411 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7412 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7413 bool ReturnDevicePointer = false; 7414 bool IsImplicit = false; 7415 const ValueDecl *Mapper = nullptr; 7416 const Expr *VarRef = nullptr; 7417 bool ForDeviceAddr = false; 7418 7419 MapInfo() = default; 7420 MapInfo( 7421 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7422 OpenMPMapClauseKind MapType, 7423 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7424 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7425 bool ReturnDevicePointer, bool IsImplicit, 7426 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7427 bool ForDeviceAddr = false) 7428 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7429 MotionModifiers(MotionModifiers), 7430 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7431 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7432 }; 7433 7434 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7435 /// member and there is no map information about it, then emission of that 7436 /// entry is deferred until the whole struct has been processed. 7437 struct DeferredDevicePtrEntryTy { 7438 const Expr *IE = nullptr; 7439 const ValueDecl *VD = nullptr; 7440 bool ForDeviceAddr = false; 7441 7442 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7443 bool ForDeviceAddr) 7444 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7445 }; 7446 7447 /// The target directive from where the mappable clauses were extracted. It 7448 /// is either a executable directive or a user-defined mapper directive. 7449 llvm::PointerUnion<const OMPExecutableDirective *, 7450 const OMPDeclareMapperDecl *> 7451 CurDir; 7452 7453 /// Function the directive is being generated for. 7454 CodeGenFunction &CGF; 7455 7456 /// Set of all first private variables in the current directive. 7457 /// bool data is set to true if the variable is implicitly marked as 7458 /// firstprivate, false otherwise. 7459 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7460 7461 /// Map between device pointer declarations and their expression components. 7462 /// The key value for declarations in 'this' is null. 7463 llvm::DenseMap< 7464 const ValueDecl *, 7465 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7466 DevPointersMap; 7467 7468 /// Map between lambda declarations and their map type. 7469 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7470 7471 llvm::Value *getExprTypeSize(const Expr *E) const { 7472 QualType ExprTy = E->getType().getCanonicalType(); 7473 7474 // Calculate the size for array shaping expression. 7475 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7476 llvm::Value *Size = 7477 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7478 for (const Expr *SE : OAE->getDimensions()) { 7479 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7480 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7481 CGF.getContext().getSizeType(), 7482 SE->getExprLoc()); 7483 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7484 } 7485 return Size; 7486 } 7487 7488 // Reference types are ignored for mapping purposes. 7489 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7490 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7491 7492 // Given that an array section is considered a built-in type, we need to 7493 // do the calculation based on the length of the section instead of relying 7494 // on CGF.getTypeSize(E->getType()). 7495 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7496 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7497 OAE->getBase()->IgnoreParenImpCasts()) 7498 .getCanonicalType(); 7499 7500 // If there is no length associated with the expression and lower bound is 7501 // not specified too, that means we are using the whole length of the 7502 // base. 7503 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7504 !OAE->getLowerBound()) 7505 return CGF.getTypeSize(BaseTy); 7506 7507 llvm::Value *ElemSize; 7508 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7509 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7510 } else { 7511 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7512 assert(ATy && "Expecting array type if not a pointer type."); 7513 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7514 } 7515 7516 // If we don't have a length at this point, that is because we have an 7517 // array section with a single element. 7518 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7519 return ElemSize; 7520 7521 if (const Expr *LenExpr = OAE->getLength()) { 7522 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7523 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7524 CGF.getContext().getSizeType(), 7525 LenExpr->getExprLoc()); 7526 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7527 } 7528 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7529 OAE->getLowerBound() && "expected array_section[lb:]."); 7530 // Size = sizetype - lb * elemtype; 7531 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7532 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7533 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7534 CGF.getContext().getSizeType(), 7535 OAE->getLowerBound()->getExprLoc()); 7536 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7537 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7538 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7539 LengthVal = CGF.Builder.CreateSelect( 7540 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7541 return LengthVal; 7542 } 7543 return CGF.getTypeSize(ExprTy); 7544 } 7545 7546 /// Return the corresponding bits for a given map clause modifier. Add 7547 /// a flag marking the map as a pointer if requested. Add a flag marking the 7548 /// map as the first one of a series of maps that relate to the same map 7549 /// expression. 7550 OpenMPOffloadMappingFlags getMapTypeBits( 7551 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7552 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7553 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7554 OpenMPOffloadMappingFlags Bits = 7555 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7556 switch (MapType) { 7557 case OMPC_MAP_alloc: 7558 case OMPC_MAP_release: 7559 // alloc and release is the default behavior in the runtime library, i.e. 7560 // if we don't pass any bits alloc/release that is what the runtime is 7561 // going to do. Therefore, we don't need to signal anything for these two 7562 // type modifiers. 7563 break; 7564 case OMPC_MAP_to: 7565 Bits |= OMP_MAP_TO; 7566 break; 7567 case OMPC_MAP_from: 7568 Bits |= OMP_MAP_FROM; 7569 break; 7570 case OMPC_MAP_tofrom: 7571 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7572 break; 7573 case OMPC_MAP_delete: 7574 Bits |= OMP_MAP_DELETE; 7575 break; 7576 case OMPC_MAP_unknown: 7577 llvm_unreachable("Unexpected map type!"); 7578 } 7579 if (AddPtrFlag) 7580 Bits |= OMP_MAP_PTR_AND_OBJ; 7581 if (AddIsTargetParamFlag) 7582 Bits |= OMP_MAP_TARGET_PARAM; 7583 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7584 Bits |= OMP_MAP_ALWAYS; 7585 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7586 Bits |= OMP_MAP_CLOSE; 7587 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7588 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7589 Bits |= OMP_MAP_PRESENT; 7590 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7591 Bits |= OMP_MAP_OMPX_HOLD; 7592 if (IsNonContiguous) 7593 Bits |= OMP_MAP_NON_CONTIG; 7594 return Bits; 7595 } 7596 7597 /// Return true if the provided expression is a final array section. A 7598 /// final array section, is one whose length can't be proved to be one. 7599 bool isFinalArraySectionExpression(const Expr *E) const { 7600 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7601 7602 // It is not an array section and therefore not a unity-size one. 7603 if (!OASE) 7604 return false; 7605 7606 // An array section with no colon always refer to a single element. 7607 if (OASE->getColonLocFirst().isInvalid()) 7608 return false; 7609 7610 const Expr *Length = OASE->getLength(); 7611 7612 // If we don't have a length we have to check if the array has size 1 7613 // for this dimension. Also, we should always expect a length if the 7614 // base type is pointer. 7615 if (!Length) { 7616 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7617 OASE->getBase()->IgnoreParenImpCasts()) 7618 .getCanonicalType(); 7619 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7620 return ATy->getSize().getSExtValue() != 1; 7621 // If we don't have a constant dimension length, we have to consider 7622 // the current section as having any size, so it is not necessarily 7623 // unitary. If it happen to be unity size, that's user fault. 7624 return true; 7625 } 7626 7627 // Check if the length evaluates to 1. 7628 Expr::EvalResult Result; 7629 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7630 return true; // Can have more that size 1. 7631 7632 llvm::APSInt ConstLength = Result.Val.getInt(); 7633 return ConstLength.getSExtValue() != 1; 7634 } 7635 7636 /// Generate the base pointers, section pointers, sizes, map type bits, and 7637 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7638 /// map type, map or motion modifiers, and expression components. 7639 /// \a IsFirstComponent should be set to true if the provided set of 7640 /// components is the first associated with a capture. 7641 void generateInfoForComponentList( 7642 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7643 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7644 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7645 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7646 bool IsFirstComponentList, bool IsImplicit, 7647 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7648 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7649 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7650 OverlappedElements = llvm::None) const { 7651 // The following summarizes what has to be generated for each map and the 7652 // types below. The generated information is expressed in this order: 7653 // base pointer, section pointer, size, flags 7654 // (to add to the ones that come from the map type and modifier). 7655 // 7656 // double d; 7657 // int i[100]; 7658 // float *p; 7659 // 7660 // struct S1 { 7661 // int i; 7662 // float f[50]; 7663 // } 7664 // struct S2 { 7665 // int i; 7666 // float f[50]; 7667 // S1 s; 7668 // double *p; 7669 // struct S2 *ps; 7670 // int &ref; 7671 // } 7672 // S2 s; 7673 // S2 *ps; 7674 // 7675 // map(d) 7676 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7677 // 7678 // map(i) 7679 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7680 // 7681 // map(i[1:23]) 7682 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7683 // 7684 // map(p) 7685 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7686 // 7687 // map(p[1:24]) 7688 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7689 // in unified shared memory mode or for local pointers 7690 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7691 // 7692 // map(s) 7693 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7694 // 7695 // map(s.i) 7696 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7697 // 7698 // map(s.s.f) 7699 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7700 // 7701 // map(s.p) 7702 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7703 // 7704 // map(to: s.p[:22]) 7705 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7706 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7707 // &(s.p), &(s.p[0]), 22*sizeof(double), 7708 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7709 // (*) alloc space for struct members, only this is a target parameter 7710 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7711 // optimizes this entry out, same in the examples below) 7712 // (***) map the pointee (map: to) 7713 // 7714 // map(to: s.ref) 7715 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7716 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7717 // (*) alloc space for struct members, only this is a target parameter 7718 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7719 // optimizes this entry out, same in the examples below) 7720 // (***) map the pointee (map: to) 7721 // 7722 // map(s.ps) 7723 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7724 // 7725 // map(from: s.ps->s.i) 7726 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7727 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7728 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7729 // 7730 // map(to: s.ps->ps) 7731 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7732 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7733 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7734 // 7735 // map(s.ps->ps->ps) 7736 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7737 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7738 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7739 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7740 // 7741 // map(to: s.ps->ps->s.f[:22]) 7742 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7743 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7744 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7745 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7746 // 7747 // map(ps) 7748 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7749 // 7750 // map(ps->i) 7751 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7752 // 7753 // map(ps->s.f) 7754 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7755 // 7756 // map(from: ps->p) 7757 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7758 // 7759 // map(to: ps->p[:22]) 7760 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7761 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7762 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7763 // 7764 // map(ps->ps) 7765 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7766 // 7767 // map(from: ps->ps->s.i) 7768 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7769 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7770 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7771 // 7772 // map(from: ps->ps->ps) 7773 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7774 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7775 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7776 // 7777 // map(ps->ps->ps->ps) 7778 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7779 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7780 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7781 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7782 // 7783 // map(to: ps->ps->ps->s.f[:22]) 7784 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7785 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7786 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7787 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7788 // 7789 // map(to: s.f[:22]) map(from: s.p[:33]) 7790 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7791 // sizeof(double*) (**), TARGET_PARAM 7792 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7793 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7794 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7795 // (*) allocate contiguous space needed to fit all mapped members even if 7796 // we allocate space for members not mapped (in this example, 7797 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7798 // them as well because they fall between &s.f[0] and &s.p) 7799 // 7800 // map(from: s.f[:22]) map(to: ps->p[:33]) 7801 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7802 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7803 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7804 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7805 // (*) the struct this entry pertains to is the 2nd element in the list of 7806 // arguments, hence MEMBER_OF(2) 7807 // 7808 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7809 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7810 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7811 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7812 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7813 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7814 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7815 // (*) the struct this entry pertains to is the 4th element in the list 7816 // of arguments, hence MEMBER_OF(4) 7817 7818 // Track if the map information being generated is the first for a capture. 7819 bool IsCaptureFirstInfo = IsFirstComponentList; 7820 // When the variable is on a declare target link or in a to clause with 7821 // unified memory, a reference is needed to hold the host/device address 7822 // of the variable. 7823 bool RequiresReference = false; 7824 7825 // Scan the components from the base to the complete expression. 7826 auto CI = Components.rbegin(); 7827 auto CE = Components.rend(); 7828 auto I = CI; 7829 7830 // Track if the map information being generated is the first for a list of 7831 // components. 7832 bool IsExpressionFirstInfo = true; 7833 bool FirstPointerInComplexData = false; 7834 Address BP = Address::invalid(); 7835 const Expr *AssocExpr = I->getAssociatedExpression(); 7836 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7837 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7838 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7839 7840 if (isa<MemberExpr>(AssocExpr)) { 7841 // The base is the 'this' pointer. The content of the pointer is going 7842 // to be the base of the field being mapped. 7843 BP = CGF.LoadCXXThisAddress(); 7844 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7845 (OASE && 7846 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7847 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7848 } else if (OAShE && 7849 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7850 BP = Address( 7851 CGF.EmitScalarExpr(OAShE->getBase()), 7852 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7853 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7854 } else { 7855 // The base is the reference to the variable. 7856 // BP = &Var. 7857 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7858 if (const auto *VD = 7859 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7860 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7861 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7862 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7863 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7864 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7865 RequiresReference = true; 7866 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7867 } 7868 } 7869 } 7870 7871 // If the variable is a pointer and is being dereferenced (i.e. is not 7872 // the last component), the base has to be the pointer itself, not its 7873 // reference. References are ignored for mapping purposes. 7874 QualType Ty = 7875 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7876 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7877 // No need to generate individual map information for the pointer, it 7878 // can be associated with the combined storage if shared memory mode is 7879 // active or the base declaration is not global variable. 7880 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7881 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7882 !VD || VD->hasLocalStorage()) 7883 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7884 else 7885 FirstPointerInComplexData = true; 7886 ++I; 7887 } 7888 } 7889 7890 // Track whether a component of the list should be marked as MEMBER_OF some 7891 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7892 // in a component list should be marked as MEMBER_OF, all subsequent entries 7893 // do not belong to the base struct. E.g. 7894 // struct S2 s; 7895 // s.ps->ps->ps->f[:] 7896 // (1) (2) (3) (4) 7897 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7898 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7899 // is the pointee of ps(2) which is not member of struct s, so it should not 7900 // be marked as such (it is still PTR_AND_OBJ). 7901 // The variable is initialized to false so that PTR_AND_OBJ entries which 7902 // are not struct members are not considered (e.g. array of pointers to 7903 // data). 7904 bool ShouldBeMemberOf = false; 7905 7906 // Variable keeping track of whether or not we have encountered a component 7907 // in the component list which is a member expression. Useful when we have a 7908 // pointer or a final array section, in which case it is the previous 7909 // component in the list which tells us whether we have a member expression. 7910 // E.g. X.f[:] 7911 // While processing the final array section "[:]" it is "f" which tells us 7912 // whether we are dealing with a member of a declared struct. 7913 const MemberExpr *EncounteredME = nullptr; 7914 7915 // Track for the total number of dimension. Start from one for the dummy 7916 // dimension. 7917 uint64_t DimSize = 1; 7918 7919 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7920 bool IsPrevMemberReference = false; 7921 7922 for (; I != CE; ++I) { 7923 // If the current component is member of a struct (parent struct) mark it. 7924 if (!EncounteredME) { 7925 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7926 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7927 // as MEMBER_OF the parent struct. 7928 if (EncounteredME) { 7929 ShouldBeMemberOf = true; 7930 // Do not emit as complex pointer if this is actually not array-like 7931 // expression. 7932 if (FirstPointerInComplexData) { 7933 QualType Ty = std::prev(I) 7934 ->getAssociatedDeclaration() 7935 ->getType() 7936 .getNonReferenceType(); 7937 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7938 FirstPointerInComplexData = false; 7939 } 7940 } 7941 } 7942 7943 auto Next = std::next(I); 7944 7945 // We need to generate the addresses and sizes if this is the last 7946 // component, if the component is a pointer or if it is an array section 7947 // whose length can't be proved to be one. If this is a pointer, it 7948 // becomes the base address for the following components. 7949 7950 // A final array section, is one whose length can't be proved to be one. 7951 // If the map item is non-contiguous then we don't treat any array section 7952 // as final array section. 7953 bool IsFinalArraySection = 7954 !IsNonContiguous && 7955 isFinalArraySectionExpression(I->getAssociatedExpression()); 7956 7957 // If we have a declaration for the mapping use that, otherwise use 7958 // the base declaration of the map clause. 7959 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7960 ? I->getAssociatedDeclaration() 7961 : BaseDecl; 7962 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7963 : MapExpr; 7964 7965 // Get information on whether the element is a pointer. Have to do a 7966 // special treatment for array sections given that they are built-in 7967 // types. 7968 const auto *OASE = 7969 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7970 const auto *OAShE = 7971 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7972 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7973 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7974 bool IsPointer = 7975 OAShE || 7976 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7977 .getCanonicalType() 7978 ->isAnyPointerType()) || 7979 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7980 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7981 MapDecl && 7982 MapDecl->getType()->isLValueReferenceType(); 7983 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7984 7985 if (OASE) 7986 ++DimSize; 7987 7988 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7989 IsFinalArraySection) { 7990 // If this is not the last component, we expect the pointer to be 7991 // associated with an array expression or member expression. 7992 assert((Next == CE || 7993 isa<MemberExpr>(Next->getAssociatedExpression()) || 7994 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7995 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7996 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7997 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7998 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7999 "Unexpected expression"); 8000 8001 Address LB = Address::invalid(); 8002 Address LowestElem = Address::invalid(); 8003 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8004 const MemberExpr *E) { 8005 const Expr *BaseExpr = E->getBase(); 8006 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8007 // scalar. 8008 LValue BaseLV; 8009 if (E->isArrow()) { 8010 LValueBaseInfo BaseInfo; 8011 TBAAAccessInfo TBAAInfo; 8012 Address Addr = 8013 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8014 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8015 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8016 } else { 8017 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8018 } 8019 return BaseLV; 8020 }; 8021 if (OAShE) { 8022 LowestElem = LB = 8023 Address(CGF.EmitScalarExpr(OAShE->getBase()), 8024 CGF.ConvertTypeForMem( 8025 OAShE->getBase()->getType()->getPointeeType()), 8026 CGF.getContext().getTypeAlignInChars( 8027 OAShE->getBase()->getType())); 8028 } else if (IsMemberReference) { 8029 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8030 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8031 LowestElem = CGF.EmitLValueForFieldInitialization( 8032 BaseLVal, cast<FieldDecl>(MapDecl)) 8033 .getAddress(CGF); 8034 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8035 .getAddress(CGF); 8036 } else { 8037 LowestElem = LB = 8038 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8039 .getAddress(CGF); 8040 } 8041 8042 // If this component is a pointer inside the base struct then we don't 8043 // need to create any entry for it - it will be combined with the object 8044 // it is pointing to into a single PTR_AND_OBJ entry. 8045 bool IsMemberPointerOrAddr = 8046 EncounteredME && 8047 (((IsPointer || ForDeviceAddr) && 8048 I->getAssociatedExpression() == EncounteredME) || 8049 (IsPrevMemberReference && !IsPointer) || 8050 (IsMemberReference && Next != CE && 8051 !Next->getAssociatedExpression()->getType()->isPointerType())); 8052 if (!OverlappedElements.empty() && Next == CE) { 8053 // Handle base element with the info for overlapped elements. 8054 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8055 assert(!IsPointer && 8056 "Unexpected base element with the pointer type."); 8057 // Mark the whole struct as the struct that requires allocation on the 8058 // device. 8059 PartialStruct.LowestElem = {0, LowestElem}; 8060 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8061 I->getAssociatedExpression()->getType()); 8062 Address HB = CGF.Builder.CreateConstGEP( 8063 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8064 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 8065 TypeSize.getQuantity() - 1); 8066 PartialStruct.HighestElem = { 8067 std::numeric_limits<decltype( 8068 PartialStruct.HighestElem.first)>::max(), 8069 HB}; 8070 PartialStruct.Base = BP; 8071 PartialStruct.LB = LB; 8072 assert( 8073 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8074 "Overlapped elements must be used only once for the variable."); 8075 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8076 // Emit data for non-overlapped data. 8077 OpenMPOffloadMappingFlags Flags = 8078 OMP_MAP_MEMBER_OF | 8079 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8080 /*AddPtrFlag=*/false, 8081 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8082 llvm::Value *Size = nullptr; 8083 // Do bitcopy of all non-overlapped structure elements. 8084 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8085 Component : OverlappedElements) { 8086 Address ComponentLB = Address::invalid(); 8087 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8088 Component) { 8089 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8090 const auto *FD = dyn_cast<FieldDecl>(VD); 8091 if (FD && FD->getType()->isLValueReferenceType()) { 8092 const auto *ME = 8093 cast<MemberExpr>(MC.getAssociatedExpression()); 8094 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8095 ComponentLB = 8096 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8097 .getAddress(CGF); 8098 } else { 8099 ComponentLB = 8100 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8101 .getAddress(CGF); 8102 } 8103 Size = CGF.Builder.CreatePtrDiff( 8104 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8105 CGF.EmitCastToVoidPtr(LB.getPointer())); 8106 break; 8107 } 8108 } 8109 assert(Size && "Failed to determine structure size"); 8110 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8111 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8112 CombinedInfo.Pointers.push_back(LB.getPointer()); 8113 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8114 Size, CGF.Int64Ty, /*isSigned=*/true)); 8115 CombinedInfo.Types.push_back(Flags); 8116 CombinedInfo.Mappers.push_back(nullptr); 8117 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8118 : 1); 8119 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8120 } 8121 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8122 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8123 CombinedInfo.Pointers.push_back(LB.getPointer()); 8124 Size = CGF.Builder.CreatePtrDiff( 8125 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8126 CGF.EmitCastToVoidPtr(LB.getPointer())); 8127 CombinedInfo.Sizes.push_back( 8128 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8129 CombinedInfo.Types.push_back(Flags); 8130 CombinedInfo.Mappers.push_back(nullptr); 8131 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8132 : 1); 8133 break; 8134 } 8135 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8136 if (!IsMemberPointerOrAddr || 8137 (Next == CE && MapType != OMPC_MAP_unknown)) { 8138 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8139 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8140 CombinedInfo.Pointers.push_back(LB.getPointer()); 8141 CombinedInfo.Sizes.push_back( 8142 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8143 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8144 : 1); 8145 8146 // If Mapper is valid, the last component inherits the mapper. 8147 bool HasMapper = Mapper && Next == CE; 8148 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8149 8150 // We need to add a pointer flag for each map that comes from the 8151 // same expression except for the first one. We also need to signal 8152 // this map is the first one that relates with the current capture 8153 // (there is a set of entries for each capture). 8154 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8155 MapType, MapModifiers, MotionModifiers, IsImplicit, 8156 !IsExpressionFirstInfo || RequiresReference || 8157 FirstPointerInComplexData || IsMemberReference, 8158 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8159 8160 if (!IsExpressionFirstInfo || IsMemberReference) { 8161 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8162 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8163 if (IsPointer || (IsMemberReference && Next != CE)) 8164 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8165 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8166 8167 if (ShouldBeMemberOf) { 8168 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8169 // should be later updated with the correct value of MEMBER_OF. 8170 Flags |= OMP_MAP_MEMBER_OF; 8171 // From now on, all subsequent PTR_AND_OBJ entries should not be 8172 // marked as MEMBER_OF. 8173 ShouldBeMemberOf = false; 8174 } 8175 } 8176 8177 CombinedInfo.Types.push_back(Flags); 8178 } 8179 8180 // If we have encountered a member expression so far, keep track of the 8181 // mapped member. If the parent is "*this", then the value declaration 8182 // is nullptr. 8183 if (EncounteredME) { 8184 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8185 unsigned FieldIndex = FD->getFieldIndex(); 8186 8187 // Update info about the lowest and highest elements for this struct 8188 if (!PartialStruct.Base.isValid()) { 8189 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8190 if (IsFinalArraySection) { 8191 Address HB = 8192 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8193 .getAddress(CGF); 8194 PartialStruct.HighestElem = {FieldIndex, HB}; 8195 } else { 8196 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8197 } 8198 PartialStruct.Base = BP; 8199 PartialStruct.LB = BP; 8200 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8201 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8202 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8203 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8204 } 8205 } 8206 8207 // Need to emit combined struct for array sections. 8208 if (IsFinalArraySection || IsNonContiguous) 8209 PartialStruct.IsArraySection = true; 8210 8211 // If we have a final array section, we are done with this expression. 8212 if (IsFinalArraySection) 8213 break; 8214 8215 // The pointer becomes the base for the next element. 8216 if (Next != CE) 8217 BP = IsMemberReference ? LowestElem : LB; 8218 8219 IsExpressionFirstInfo = false; 8220 IsCaptureFirstInfo = false; 8221 FirstPointerInComplexData = false; 8222 IsPrevMemberReference = IsMemberReference; 8223 } else if (FirstPointerInComplexData) { 8224 QualType Ty = Components.rbegin() 8225 ->getAssociatedDeclaration() 8226 ->getType() 8227 .getNonReferenceType(); 8228 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8229 FirstPointerInComplexData = false; 8230 } 8231 } 8232 // If ran into the whole component - allocate the space for the whole 8233 // record. 8234 if (!EncounteredME) 8235 PartialStruct.HasCompleteRecord = true; 8236 8237 if (!IsNonContiguous) 8238 return; 8239 8240 const ASTContext &Context = CGF.getContext(); 8241 8242 // For supporting stride in array section, we need to initialize the first 8243 // dimension size as 1, first offset as 0, and first count as 1 8244 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8245 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8246 MapValuesArrayTy CurStrides; 8247 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8248 uint64_t ElementTypeSize; 8249 8250 // Collect Size information for each dimension and get the element size as 8251 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8252 // should be [10, 10] and the first stride is 4 btyes. 8253 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8254 Components) { 8255 const Expr *AssocExpr = Component.getAssociatedExpression(); 8256 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8257 8258 if (!OASE) 8259 continue; 8260 8261 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8262 auto *CAT = Context.getAsConstantArrayType(Ty); 8263 auto *VAT = Context.getAsVariableArrayType(Ty); 8264 8265 // We need all the dimension size except for the last dimension. 8266 assert((VAT || CAT || &Component == &*Components.begin()) && 8267 "Should be either ConstantArray or VariableArray if not the " 8268 "first Component"); 8269 8270 // Get element size if CurStrides is empty. 8271 if (CurStrides.empty()) { 8272 const Type *ElementType = nullptr; 8273 if (CAT) 8274 ElementType = CAT->getElementType().getTypePtr(); 8275 else if (VAT) 8276 ElementType = VAT->getElementType().getTypePtr(); 8277 else 8278 assert(&Component == &*Components.begin() && 8279 "Only expect pointer (non CAT or VAT) when this is the " 8280 "first Component"); 8281 // If ElementType is null, then it means the base is a pointer 8282 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8283 // for next iteration. 8284 if (ElementType) { 8285 // For the case that having pointer as base, we need to remove one 8286 // level of indirection. 8287 if (&Component != &*Components.begin()) 8288 ElementType = ElementType->getPointeeOrArrayElementType(); 8289 ElementTypeSize = 8290 Context.getTypeSizeInChars(ElementType).getQuantity(); 8291 CurStrides.push_back( 8292 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8293 } 8294 } 8295 // Get dimension value except for the last dimension since we don't need 8296 // it. 8297 if (DimSizes.size() < Components.size() - 1) { 8298 if (CAT) 8299 DimSizes.push_back(llvm::ConstantInt::get( 8300 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8301 else if (VAT) 8302 DimSizes.push_back(CGF.Builder.CreateIntCast( 8303 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8304 /*IsSigned=*/false)); 8305 } 8306 } 8307 8308 // Skip the dummy dimension since we have already have its information. 8309 auto *DI = DimSizes.begin() + 1; 8310 // Product of dimension. 8311 llvm::Value *DimProd = 8312 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8313 8314 // Collect info for non-contiguous. Notice that offset, count, and stride 8315 // are only meaningful for array-section, so we insert a null for anything 8316 // other than array-section. 8317 // Also, the size of offset, count, and stride are not the same as 8318 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8319 // count, and stride are the same as the number of non-contiguous 8320 // declaration in target update to/from clause. 8321 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8322 Components) { 8323 const Expr *AssocExpr = Component.getAssociatedExpression(); 8324 8325 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8326 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8327 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8328 /*isSigned=*/false); 8329 CurOffsets.push_back(Offset); 8330 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8331 CurStrides.push_back(CurStrides.back()); 8332 continue; 8333 } 8334 8335 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8336 8337 if (!OASE) 8338 continue; 8339 8340 // Offset 8341 const Expr *OffsetExpr = OASE->getLowerBound(); 8342 llvm::Value *Offset = nullptr; 8343 if (!OffsetExpr) { 8344 // If offset is absent, then we just set it to zero. 8345 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8346 } else { 8347 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8348 CGF.Int64Ty, 8349 /*isSigned=*/false); 8350 } 8351 CurOffsets.push_back(Offset); 8352 8353 // Count 8354 const Expr *CountExpr = OASE->getLength(); 8355 llvm::Value *Count = nullptr; 8356 if (!CountExpr) { 8357 // In Clang, once a high dimension is an array section, we construct all 8358 // the lower dimension as array section, however, for case like 8359 // arr[0:2][2], Clang construct the inner dimension as an array section 8360 // but it actually is not in an array section form according to spec. 8361 if (!OASE->getColonLocFirst().isValid() && 8362 !OASE->getColonLocSecond().isValid()) { 8363 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8364 } else { 8365 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8366 // When the length is absent it defaults to ⌈(size − 8367 // lower-bound)/stride⌉, where size is the size of the array 8368 // dimension. 8369 const Expr *StrideExpr = OASE->getStride(); 8370 llvm::Value *Stride = 8371 StrideExpr 8372 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8373 CGF.Int64Ty, /*isSigned=*/false) 8374 : nullptr; 8375 if (Stride) 8376 Count = CGF.Builder.CreateUDiv( 8377 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8378 else 8379 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8380 } 8381 } else { 8382 Count = CGF.EmitScalarExpr(CountExpr); 8383 } 8384 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8385 CurCounts.push_back(Count); 8386 8387 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8388 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8389 // Offset Count Stride 8390 // D0 0 1 4 (int) <- dummy dimension 8391 // D1 0 2 8 (2 * (1) * 4) 8392 // D2 1 2 20 (1 * (1 * 5) * 4) 8393 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8394 const Expr *StrideExpr = OASE->getStride(); 8395 llvm::Value *Stride = 8396 StrideExpr 8397 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8398 CGF.Int64Ty, /*isSigned=*/false) 8399 : nullptr; 8400 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8401 if (Stride) 8402 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8403 else 8404 CurStrides.push_back(DimProd); 8405 if (DI != DimSizes.end()) 8406 ++DI; 8407 } 8408 8409 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8410 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8411 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8412 } 8413 8414 /// Return the adjusted map modifiers if the declaration a capture refers to 8415 /// appears in a first-private clause. This is expected to be used only with 8416 /// directives that start with 'target'. 8417 MappableExprsHandler::OpenMPOffloadMappingFlags 8418 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8419 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8420 8421 // A first private variable captured by reference will use only the 8422 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8423 // declaration is known as first-private in this handler. 8424 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8425 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8426 return MappableExprsHandler::OMP_MAP_TO | 8427 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8428 return MappableExprsHandler::OMP_MAP_PRIVATE | 8429 MappableExprsHandler::OMP_MAP_TO; 8430 } 8431 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8432 if (I != LambdasMap.end()) 8433 // for map(to: lambda): using user specified map type. 8434 return getMapTypeBits( 8435 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8436 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8437 /*AddPtrFlag=*/false, 8438 /*AddIsTargetParamFlag=*/false, 8439 /*isNonContiguous=*/false); 8440 return MappableExprsHandler::OMP_MAP_TO | 8441 MappableExprsHandler::OMP_MAP_FROM; 8442 } 8443 8444 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8445 // Rotate by getFlagMemberOffset() bits. 8446 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8447 << getFlagMemberOffset()); 8448 } 8449 8450 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8451 OpenMPOffloadMappingFlags MemberOfFlag) { 8452 // If the entry is PTR_AND_OBJ but has not been marked with the special 8453 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8454 // marked as MEMBER_OF. 8455 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8456 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8457 return; 8458 8459 // Reset the placeholder value to prepare the flag for the assignment of the 8460 // proper MEMBER_OF value. 8461 Flags &= ~OMP_MAP_MEMBER_OF; 8462 Flags |= MemberOfFlag; 8463 } 8464 8465 void getPlainLayout(const CXXRecordDecl *RD, 8466 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8467 bool AsBase) const { 8468 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8469 8470 llvm::StructType *St = 8471 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8472 8473 unsigned NumElements = St->getNumElements(); 8474 llvm::SmallVector< 8475 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8476 RecordLayout(NumElements); 8477 8478 // Fill bases. 8479 for (const auto &I : RD->bases()) { 8480 if (I.isVirtual()) 8481 continue; 8482 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8483 // Ignore empty bases. 8484 if (Base->isEmpty() || CGF.getContext() 8485 .getASTRecordLayout(Base) 8486 .getNonVirtualSize() 8487 .isZero()) 8488 continue; 8489 8490 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8491 RecordLayout[FieldIndex] = Base; 8492 } 8493 // Fill in virtual bases. 8494 for (const auto &I : RD->vbases()) { 8495 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8496 // Ignore empty bases. 8497 if (Base->isEmpty()) 8498 continue; 8499 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8500 if (RecordLayout[FieldIndex]) 8501 continue; 8502 RecordLayout[FieldIndex] = Base; 8503 } 8504 // Fill in all the fields. 8505 assert(!RD->isUnion() && "Unexpected union."); 8506 for (const auto *Field : RD->fields()) { 8507 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8508 // will fill in later.) 8509 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8510 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8511 RecordLayout[FieldIndex] = Field; 8512 } 8513 } 8514 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8515 &Data : RecordLayout) { 8516 if (Data.isNull()) 8517 continue; 8518 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8519 getPlainLayout(Base, Layout, /*AsBase=*/true); 8520 else 8521 Layout.push_back(Data.get<const FieldDecl *>()); 8522 } 8523 } 8524 8525 /// Generate all the base pointers, section pointers, sizes, map types, and 8526 /// mappers for the extracted mappable expressions (all included in \a 8527 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8528 /// pair of the relevant declaration and index where it occurs is appended to 8529 /// the device pointers info array. 8530 void generateAllInfoForClauses( 8531 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8532 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8533 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8534 // We have to process the component lists that relate with the same 8535 // declaration in a single chunk so that we can generate the map flags 8536 // correctly. Therefore, we organize all lists in a map. 8537 enum MapKind { Present, Allocs, Other, Total }; 8538 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8539 SmallVector<SmallVector<MapInfo, 8>, 4>> 8540 Info; 8541 8542 // Helper function to fill the information map for the different supported 8543 // clauses. 8544 auto &&InfoGen = 8545 [&Info, &SkipVarSet]( 8546 const ValueDecl *D, MapKind Kind, 8547 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8548 OpenMPMapClauseKind MapType, 8549 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8550 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8551 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8552 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8553 if (SkipVarSet.contains(D)) 8554 return; 8555 auto It = Info.find(D); 8556 if (It == Info.end()) 8557 It = Info 8558 .insert(std::make_pair( 8559 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8560 .first; 8561 It->second[Kind].emplace_back( 8562 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8563 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8564 }; 8565 8566 for (const auto *Cl : Clauses) { 8567 const auto *C = dyn_cast<OMPMapClause>(Cl); 8568 if (!C) 8569 continue; 8570 MapKind Kind = Other; 8571 if (llvm::is_contained(C->getMapTypeModifiers(), 8572 OMPC_MAP_MODIFIER_present)) 8573 Kind = Present; 8574 else if (C->getMapType() == OMPC_MAP_alloc) 8575 Kind = Allocs; 8576 const auto *EI = C->getVarRefs().begin(); 8577 for (const auto L : C->component_lists()) { 8578 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8579 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8580 C->getMapTypeModifiers(), llvm::None, 8581 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8582 E); 8583 ++EI; 8584 } 8585 } 8586 for (const auto *Cl : Clauses) { 8587 const auto *C = dyn_cast<OMPToClause>(Cl); 8588 if (!C) 8589 continue; 8590 MapKind Kind = Other; 8591 if (llvm::is_contained(C->getMotionModifiers(), 8592 OMPC_MOTION_MODIFIER_present)) 8593 Kind = Present; 8594 const auto *EI = C->getVarRefs().begin(); 8595 for (const auto L : C->component_lists()) { 8596 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8597 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8598 C->isImplicit(), std::get<2>(L), *EI); 8599 ++EI; 8600 } 8601 } 8602 for (const auto *Cl : Clauses) { 8603 const auto *C = dyn_cast<OMPFromClause>(Cl); 8604 if (!C) 8605 continue; 8606 MapKind Kind = Other; 8607 if (llvm::is_contained(C->getMotionModifiers(), 8608 OMPC_MOTION_MODIFIER_present)) 8609 Kind = Present; 8610 const auto *EI = C->getVarRefs().begin(); 8611 for (const auto L : C->component_lists()) { 8612 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8613 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8614 C->isImplicit(), std::get<2>(L), *EI); 8615 ++EI; 8616 } 8617 } 8618 8619 // Look at the use_device_ptr clause information and mark the existing map 8620 // entries as such. If there is no map information for an entry in the 8621 // use_device_ptr list, we create one with map type 'alloc' and zero size 8622 // section. It is the user fault if that was not mapped before. If there is 8623 // no map information and the pointer is a struct member, then we defer the 8624 // emission of that entry until the whole struct has been processed. 8625 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8626 SmallVector<DeferredDevicePtrEntryTy, 4>> 8627 DeferredInfo; 8628 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8629 8630 for (const auto *Cl : Clauses) { 8631 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8632 if (!C) 8633 continue; 8634 for (const auto L : C->component_lists()) { 8635 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8636 std::get<1>(L); 8637 assert(!Components.empty() && 8638 "Not expecting empty list of components!"); 8639 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8640 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8641 const Expr *IE = Components.back().getAssociatedExpression(); 8642 // If the first component is a member expression, we have to look into 8643 // 'this', which maps to null in the map of map information. Otherwise 8644 // look directly for the information. 8645 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8646 8647 // We potentially have map information for this declaration already. 8648 // Look for the first set of components that refer to it. 8649 if (It != Info.end()) { 8650 bool Found = false; 8651 for (auto &Data : It->second) { 8652 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8653 return MI.Components.back().getAssociatedDeclaration() == VD; 8654 }); 8655 // If we found a map entry, signal that the pointer has to be 8656 // returned and move on to the next declaration. Exclude cases where 8657 // the base pointer is mapped as array subscript, array section or 8658 // array shaping. The base address is passed as a pointer to base in 8659 // this case and cannot be used as a base for use_device_ptr list 8660 // item. 8661 if (CI != Data.end()) { 8662 auto PrevCI = std::next(CI->Components.rbegin()); 8663 const auto *VarD = dyn_cast<VarDecl>(VD); 8664 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8665 isa<MemberExpr>(IE) || 8666 !VD->getType().getNonReferenceType()->isPointerType() || 8667 PrevCI == CI->Components.rend() || 8668 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8669 VarD->hasLocalStorage()) { 8670 CI->ReturnDevicePointer = true; 8671 Found = true; 8672 break; 8673 } 8674 } 8675 } 8676 if (Found) 8677 continue; 8678 } 8679 8680 // We didn't find any match in our map information - generate a zero 8681 // size array section - if the pointer is a struct member we defer this 8682 // action until the whole struct has been processed. 8683 if (isa<MemberExpr>(IE)) { 8684 // Insert the pointer into Info to be processed by 8685 // generateInfoForComponentList. Because it is a member pointer 8686 // without a pointee, no entry will be generated for it, therefore 8687 // we need to generate one after the whole struct has been processed. 8688 // Nonetheless, generateInfoForComponentList must be called to take 8689 // the pointer into account for the calculation of the range of the 8690 // partial struct. 8691 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8692 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8693 nullptr); 8694 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8695 } else { 8696 llvm::Value *Ptr = 8697 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8698 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8699 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8700 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8701 UseDevicePtrCombinedInfo.Sizes.push_back( 8702 llvm::Constant::getNullValue(CGF.Int64Ty)); 8703 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8704 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8705 } 8706 } 8707 } 8708 8709 // Look at the use_device_addr clause information and mark the existing map 8710 // entries as such. If there is no map information for an entry in the 8711 // use_device_addr list, we create one with map type 'alloc' and zero size 8712 // section. It is the user fault if that was not mapped before. If there is 8713 // no map information and the pointer is a struct member, then we defer the 8714 // emission of that entry until the whole struct has been processed. 8715 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8716 for (const auto *Cl : Clauses) { 8717 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8718 if (!C) 8719 continue; 8720 for (const auto L : C->component_lists()) { 8721 assert(!std::get<1>(L).empty() && 8722 "Not expecting empty list of components!"); 8723 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8724 if (!Processed.insert(VD).second) 8725 continue; 8726 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8727 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8728 // If the first component is a member expression, we have to look into 8729 // 'this', which maps to null in the map of map information. Otherwise 8730 // look directly for the information. 8731 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8732 8733 // We potentially have map information for this declaration already. 8734 // Look for the first set of components that refer to it. 8735 if (It != Info.end()) { 8736 bool Found = false; 8737 for (auto &Data : It->second) { 8738 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8739 return MI.Components.back().getAssociatedDeclaration() == VD; 8740 }); 8741 // If we found a map entry, signal that the pointer has to be 8742 // returned and move on to the next declaration. 8743 if (CI != Data.end()) { 8744 CI->ReturnDevicePointer = true; 8745 Found = true; 8746 break; 8747 } 8748 } 8749 if (Found) 8750 continue; 8751 } 8752 8753 // We didn't find any match in our map information - generate a zero 8754 // size array section - if the pointer is a struct member we defer this 8755 // action until the whole struct has been processed. 8756 if (isa<MemberExpr>(IE)) { 8757 // Insert the pointer into Info to be processed by 8758 // generateInfoForComponentList. Because it is a member pointer 8759 // without a pointee, no entry will be generated for it, therefore 8760 // we need to generate one after the whole struct has been processed. 8761 // Nonetheless, generateInfoForComponentList must be called to take 8762 // the pointer into account for the calculation of the range of the 8763 // partial struct. 8764 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8765 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8766 nullptr, nullptr, /*ForDeviceAddr=*/true); 8767 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8768 } else { 8769 llvm::Value *Ptr; 8770 if (IE->isGLValue()) 8771 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8772 else 8773 Ptr = CGF.EmitScalarExpr(IE); 8774 CombinedInfo.Exprs.push_back(VD); 8775 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8776 CombinedInfo.Pointers.push_back(Ptr); 8777 CombinedInfo.Sizes.push_back( 8778 llvm::Constant::getNullValue(CGF.Int64Ty)); 8779 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8780 CombinedInfo.Mappers.push_back(nullptr); 8781 } 8782 } 8783 } 8784 8785 for (const auto &Data : Info) { 8786 StructRangeInfoTy PartialStruct; 8787 // Temporary generated information. 8788 MapCombinedInfoTy CurInfo; 8789 const Decl *D = Data.first; 8790 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8791 for (const auto &M : Data.second) { 8792 for (const MapInfo &L : M) { 8793 assert(!L.Components.empty() && 8794 "Not expecting declaration with no component lists."); 8795 8796 // Remember the current base pointer index. 8797 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8798 CurInfo.NonContigInfo.IsNonContiguous = 8799 L.Components.back().isNonContiguous(); 8800 generateInfoForComponentList( 8801 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8802 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8803 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8804 8805 // If this entry relates with a device pointer, set the relevant 8806 // declaration and add the 'return pointer' flag. 8807 if (L.ReturnDevicePointer) { 8808 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8809 "Unexpected number of mapped base pointers."); 8810 8811 const ValueDecl *RelevantVD = 8812 L.Components.back().getAssociatedDeclaration(); 8813 assert(RelevantVD && 8814 "No relevant declaration related with device pointer??"); 8815 8816 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8817 RelevantVD); 8818 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8819 } 8820 } 8821 } 8822 8823 // Append any pending zero-length pointers which are struct members and 8824 // used with use_device_ptr or use_device_addr. 8825 auto CI = DeferredInfo.find(Data.first); 8826 if (CI != DeferredInfo.end()) { 8827 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8828 llvm::Value *BasePtr; 8829 llvm::Value *Ptr; 8830 if (L.ForDeviceAddr) { 8831 if (L.IE->isGLValue()) 8832 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8833 else 8834 Ptr = this->CGF.EmitScalarExpr(L.IE); 8835 BasePtr = Ptr; 8836 // Entry is RETURN_PARAM. Also, set the placeholder value 8837 // MEMBER_OF=FFFF so that the entry is later updated with the 8838 // correct value of MEMBER_OF. 8839 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8840 } else { 8841 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8842 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8843 L.IE->getExprLoc()); 8844 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8845 // placeholder value MEMBER_OF=FFFF so that the entry is later 8846 // updated with the correct value of MEMBER_OF. 8847 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8848 OMP_MAP_MEMBER_OF); 8849 } 8850 CurInfo.Exprs.push_back(L.VD); 8851 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8852 CurInfo.Pointers.push_back(Ptr); 8853 CurInfo.Sizes.push_back( 8854 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8855 CurInfo.Mappers.push_back(nullptr); 8856 } 8857 } 8858 // If there is an entry in PartialStruct it means we have a struct with 8859 // individual members mapped. Emit an extra combined entry. 8860 if (PartialStruct.Base.isValid()) { 8861 CurInfo.NonContigInfo.Dims.push_back(0); 8862 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8863 } 8864 8865 // We need to append the results of this capture to what we already 8866 // have. 8867 CombinedInfo.append(CurInfo); 8868 } 8869 // Append data for use_device_ptr clauses. 8870 CombinedInfo.append(UseDevicePtrCombinedInfo); 8871 } 8872 8873 public: 8874 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8875 : CurDir(&Dir), CGF(CGF) { 8876 // Extract firstprivate clause information. 8877 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8878 for (const auto *D : C->varlists()) 8879 FirstPrivateDecls.try_emplace( 8880 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8881 // Extract implicit firstprivates from uses_allocators clauses. 8882 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8883 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8884 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8885 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8886 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8887 /*Implicit=*/true); 8888 else if (const auto *VD = dyn_cast<VarDecl>( 8889 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8890 ->getDecl())) 8891 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8892 } 8893 } 8894 // Extract device pointer clause information. 8895 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8896 for (auto L : C->component_lists()) 8897 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8898 // Extract map information. 8899 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8900 if (C->getMapType() != OMPC_MAP_to) 8901 continue; 8902 for (auto L : C->component_lists()) { 8903 const ValueDecl *VD = std::get<0>(L); 8904 const auto *RD = VD ? VD->getType() 8905 .getCanonicalType() 8906 .getNonReferenceType() 8907 ->getAsCXXRecordDecl() 8908 : nullptr; 8909 if (RD && RD->isLambda()) 8910 LambdasMap.try_emplace(std::get<0>(L), C); 8911 } 8912 } 8913 } 8914 8915 /// Constructor for the declare mapper directive. 8916 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8917 : CurDir(&Dir), CGF(CGF) {} 8918 8919 /// Generate code for the combined entry if we have a partially mapped struct 8920 /// and take care of the mapping flags of the arguments corresponding to 8921 /// individual struct members. 8922 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8923 MapFlagsArrayTy &CurTypes, 8924 const StructRangeInfoTy &PartialStruct, 8925 const ValueDecl *VD = nullptr, 8926 bool NotTargetParams = true) const { 8927 if (CurTypes.size() == 1 && 8928 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8929 !PartialStruct.IsArraySection) 8930 return; 8931 Address LBAddr = PartialStruct.LowestElem.second; 8932 Address HBAddr = PartialStruct.HighestElem.second; 8933 if (PartialStruct.HasCompleteRecord) { 8934 LBAddr = PartialStruct.LB; 8935 HBAddr = PartialStruct.LB; 8936 } 8937 CombinedInfo.Exprs.push_back(VD); 8938 // Base is the base of the struct 8939 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8940 // Pointer is the address of the lowest element 8941 llvm::Value *LB = LBAddr.getPointer(); 8942 CombinedInfo.Pointers.push_back(LB); 8943 // There should not be a mapper for a combined entry. 8944 CombinedInfo.Mappers.push_back(nullptr); 8945 // Size is (addr of {highest+1} element) - (addr of lowest element) 8946 llvm::Value *HB = HBAddr.getPointer(); 8947 llvm::Value *HAddr = 8948 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8949 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8950 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8951 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8952 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8953 /*isSigned=*/false); 8954 CombinedInfo.Sizes.push_back(Size); 8955 // Map type is always TARGET_PARAM, if generate info for captures. 8956 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8957 : OMP_MAP_TARGET_PARAM); 8958 // If any element has the present modifier, then make sure the runtime 8959 // doesn't attempt to allocate the struct. 8960 if (CurTypes.end() != 8961 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8962 return Type & OMP_MAP_PRESENT; 8963 })) 8964 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8965 // Remove TARGET_PARAM flag from the first element 8966 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8967 // If any element has the ompx_hold modifier, then make sure the runtime 8968 // uses the hold reference count for the struct as a whole so that it won't 8969 // be unmapped by an extra dynamic reference count decrement. Add it to all 8970 // elements as well so the runtime knows which reference count to check 8971 // when determining whether it's time for device-to-host transfers of 8972 // individual elements. 8973 if (CurTypes.end() != 8974 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8975 return Type & OMP_MAP_OMPX_HOLD; 8976 })) { 8977 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8978 for (auto &M : CurTypes) 8979 M |= OMP_MAP_OMPX_HOLD; 8980 } 8981 8982 // All other current entries will be MEMBER_OF the combined entry 8983 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8984 // 0xFFFF in the MEMBER_OF field). 8985 OpenMPOffloadMappingFlags MemberOfFlag = 8986 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8987 for (auto &M : CurTypes) 8988 setCorrectMemberOfFlag(M, MemberOfFlag); 8989 } 8990 8991 /// Generate all the base pointers, section pointers, sizes, map types, and 8992 /// mappers for the extracted mappable expressions (all included in \a 8993 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8994 /// pair of the relevant declaration and index where it occurs is appended to 8995 /// the device pointers info array. 8996 void generateAllInfo( 8997 MapCombinedInfoTy &CombinedInfo, 8998 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8999 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9000 assert(CurDir.is<const OMPExecutableDirective *>() && 9001 "Expect a executable directive"); 9002 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9003 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9004 } 9005 9006 /// Generate all the base pointers, section pointers, sizes, map types, and 9007 /// mappers for the extracted map clauses of user-defined mapper (all included 9008 /// in \a CombinedInfo). 9009 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9010 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9011 "Expect a declare mapper directive"); 9012 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9013 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9014 } 9015 9016 /// Emit capture info for lambdas for variables captured by reference. 9017 void generateInfoForLambdaCaptures( 9018 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9019 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9020 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 9021 const auto *RD = VDType->getAsCXXRecordDecl(); 9022 if (!RD || !RD->isLambda()) 9023 return; 9024 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 9025 CGF.getContext().getDeclAlign(VD)); 9026 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 9027 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9028 FieldDecl *ThisCapture = nullptr; 9029 RD->getCaptureFields(Captures, ThisCapture); 9030 if (ThisCapture) { 9031 LValue ThisLVal = 9032 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9033 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9034 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9035 VDLVal.getPointer(CGF)); 9036 CombinedInfo.Exprs.push_back(VD); 9037 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9038 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9039 CombinedInfo.Sizes.push_back( 9040 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9041 CGF.Int64Ty, /*isSigned=*/true)); 9042 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9043 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9044 CombinedInfo.Mappers.push_back(nullptr); 9045 } 9046 for (const LambdaCapture &LC : RD->captures()) { 9047 if (!LC.capturesVariable()) 9048 continue; 9049 const VarDecl *VD = LC.getCapturedVar(); 9050 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9051 continue; 9052 auto It = Captures.find(VD); 9053 assert(It != Captures.end() && "Found lambda capture without field."); 9054 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9055 if (LC.getCaptureKind() == LCK_ByRef) { 9056 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9057 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9058 VDLVal.getPointer(CGF)); 9059 CombinedInfo.Exprs.push_back(VD); 9060 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9061 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9062 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9063 CGF.getTypeSize( 9064 VD->getType().getCanonicalType().getNonReferenceType()), 9065 CGF.Int64Ty, /*isSigned=*/true)); 9066 } else { 9067 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9068 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9069 VDLVal.getPointer(CGF)); 9070 CombinedInfo.Exprs.push_back(VD); 9071 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9072 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9073 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9074 } 9075 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9076 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9077 CombinedInfo.Mappers.push_back(nullptr); 9078 } 9079 } 9080 9081 /// Set correct indices for lambdas captures. 9082 void adjustMemberOfForLambdaCaptures( 9083 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9084 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9085 MapFlagsArrayTy &Types) const { 9086 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9087 // Set correct member_of idx for all implicit lambda captures. 9088 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9089 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9090 continue; 9091 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9092 assert(BasePtr && "Unable to find base lambda address."); 9093 int TgtIdx = -1; 9094 for (unsigned J = I; J > 0; --J) { 9095 unsigned Idx = J - 1; 9096 if (Pointers[Idx] != BasePtr) 9097 continue; 9098 TgtIdx = Idx; 9099 break; 9100 } 9101 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9102 // All other current entries will be MEMBER_OF the combined entry 9103 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9104 // 0xFFFF in the MEMBER_OF field). 9105 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9106 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9107 } 9108 } 9109 9110 /// Generate the base pointers, section pointers, sizes, map types, and 9111 /// mappers associated to a given capture (all included in \a CombinedInfo). 9112 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9113 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9114 StructRangeInfoTy &PartialStruct) const { 9115 assert(!Cap->capturesVariableArrayType() && 9116 "Not expecting to generate map info for a variable array type!"); 9117 9118 // We need to know when we generating information for the first component 9119 const ValueDecl *VD = Cap->capturesThis() 9120 ? nullptr 9121 : Cap->getCapturedVar()->getCanonicalDecl(); 9122 9123 // for map(to: lambda): skip here, processing it in 9124 // generateDefaultMapInfo 9125 if (LambdasMap.count(VD)) 9126 return; 9127 9128 // If this declaration appears in a is_device_ptr clause we just have to 9129 // pass the pointer by value. If it is a reference to a declaration, we just 9130 // pass its value. 9131 if (DevPointersMap.count(VD)) { 9132 CombinedInfo.Exprs.push_back(VD); 9133 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9134 CombinedInfo.Pointers.push_back(Arg); 9135 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9136 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9137 /*isSigned=*/true)); 9138 CombinedInfo.Types.push_back( 9139 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9140 OMP_MAP_TARGET_PARAM); 9141 CombinedInfo.Mappers.push_back(nullptr); 9142 return; 9143 } 9144 9145 using MapData = 9146 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9147 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9148 const ValueDecl *, const Expr *>; 9149 SmallVector<MapData, 4> DeclComponentLists; 9150 assert(CurDir.is<const OMPExecutableDirective *>() && 9151 "Expect a executable directive"); 9152 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9153 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9154 const auto *EI = C->getVarRefs().begin(); 9155 for (const auto L : C->decl_component_lists(VD)) { 9156 const ValueDecl *VDecl, *Mapper; 9157 // The Expression is not correct if the mapping is implicit 9158 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9159 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9160 std::tie(VDecl, Components, Mapper) = L; 9161 assert(VDecl == VD && "We got information for the wrong declaration??"); 9162 assert(!Components.empty() && 9163 "Not expecting declaration with no component lists."); 9164 DeclComponentLists.emplace_back(Components, C->getMapType(), 9165 C->getMapTypeModifiers(), 9166 C->isImplicit(), Mapper, E); 9167 ++EI; 9168 } 9169 } 9170 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9171 const MapData &RHS) { 9172 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9173 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9174 bool HasPresent = 9175 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9176 bool HasAllocs = MapType == OMPC_MAP_alloc; 9177 MapModifiers = std::get<2>(RHS); 9178 MapType = std::get<1>(LHS); 9179 bool HasPresentR = 9180 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9181 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9182 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9183 }); 9184 9185 // Find overlapping elements (including the offset from the base element). 9186 llvm::SmallDenseMap< 9187 const MapData *, 9188 llvm::SmallVector< 9189 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9190 4> 9191 OverlappedData; 9192 size_t Count = 0; 9193 for (const MapData &L : DeclComponentLists) { 9194 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9195 OpenMPMapClauseKind MapType; 9196 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9197 bool IsImplicit; 9198 const ValueDecl *Mapper; 9199 const Expr *VarRef; 9200 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9201 L; 9202 ++Count; 9203 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9204 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9205 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9206 VarRef) = L1; 9207 auto CI = Components.rbegin(); 9208 auto CE = Components.rend(); 9209 auto SI = Components1.rbegin(); 9210 auto SE = Components1.rend(); 9211 for (; CI != CE && SI != SE; ++CI, ++SI) { 9212 if (CI->getAssociatedExpression()->getStmtClass() != 9213 SI->getAssociatedExpression()->getStmtClass()) 9214 break; 9215 // Are we dealing with different variables/fields? 9216 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9217 break; 9218 } 9219 // Found overlapping if, at least for one component, reached the head 9220 // of the components list. 9221 if (CI == CE || SI == SE) { 9222 // Ignore it if it is the same component. 9223 if (CI == CE && SI == SE) 9224 continue; 9225 const auto It = (SI == SE) ? CI : SI; 9226 // If one component is a pointer and another one is a kind of 9227 // dereference of this pointer (array subscript, section, dereference, 9228 // etc.), it is not an overlapping. 9229 // Same, if one component is a base and another component is a 9230 // dereferenced pointer memberexpr with the same base. 9231 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9232 (std::prev(It)->getAssociatedDeclaration() && 9233 std::prev(It) 9234 ->getAssociatedDeclaration() 9235 ->getType() 9236 ->isPointerType()) || 9237 (It->getAssociatedDeclaration() && 9238 It->getAssociatedDeclaration()->getType()->isPointerType() && 9239 std::next(It) != CE && std::next(It) != SE)) 9240 continue; 9241 const MapData &BaseData = CI == CE ? L : L1; 9242 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9243 SI == SE ? Components : Components1; 9244 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9245 OverlappedElements.getSecond().push_back(SubData); 9246 } 9247 } 9248 } 9249 // Sort the overlapped elements for each item. 9250 llvm::SmallVector<const FieldDecl *, 4> Layout; 9251 if (!OverlappedData.empty()) { 9252 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9253 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9254 while (BaseType != OrigType) { 9255 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9256 OrigType = BaseType->getPointeeOrArrayElementType(); 9257 } 9258 9259 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9260 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9261 else { 9262 const auto *RD = BaseType->getAsRecordDecl(); 9263 Layout.append(RD->field_begin(), RD->field_end()); 9264 } 9265 } 9266 for (auto &Pair : OverlappedData) { 9267 llvm::stable_sort( 9268 Pair.getSecond(), 9269 [&Layout]( 9270 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9271 OMPClauseMappableExprCommon::MappableExprComponentListRef 9272 Second) { 9273 auto CI = First.rbegin(); 9274 auto CE = First.rend(); 9275 auto SI = Second.rbegin(); 9276 auto SE = Second.rend(); 9277 for (; CI != CE && SI != SE; ++CI, ++SI) { 9278 if (CI->getAssociatedExpression()->getStmtClass() != 9279 SI->getAssociatedExpression()->getStmtClass()) 9280 break; 9281 // Are we dealing with different variables/fields? 9282 if (CI->getAssociatedDeclaration() != 9283 SI->getAssociatedDeclaration()) 9284 break; 9285 } 9286 9287 // Lists contain the same elements. 9288 if (CI == CE && SI == SE) 9289 return false; 9290 9291 // List with less elements is less than list with more elements. 9292 if (CI == CE || SI == SE) 9293 return CI == CE; 9294 9295 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9296 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9297 if (FD1->getParent() == FD2->getParent()) 9298 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9299 const auto *It = 9300 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9301 return FD == FD1 || FD == FD2; 9302 }); 9303 return *It == FD1; 9304 }); 9305 } 9306 9307 // Associated with a capture, because the mapping flags depend on it. 9308 // Go through all of the elements with the overlapped elements. 9309 bool IsFirstComponentList = true; 9310 for (const auto &Pair : OverlappedData) { 9311 const MapData &L = *Pair.getFirst(); 9312 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9313 OpenMPMapClauseKind MapType; 9314 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9315 bool IsImplicit; 9316 const ValueDecl *Mapper; 9317 const Expr *VarRef; 9318 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9319 L; 9320 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9321 OverlappedComponents = Pair.getSecond(); 9322 generateInfoForComponentList( 9323 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9324 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9325 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9326 IsFirstComponentList = false; 9327 } 9328 // Go through other elements without overlapped elements. 9329 for (const MapData &L : DeclComponentLists) { 9330 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9331 OpenMPMapClauseKind MapType; 9332 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9333 bool IsImplicit; 9334 const ValueDecl *Mapper; 9335 const Expr *VarRef; 9336 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9337 L; 9338 auto It = OverlappedData.find(&L); 9339 if (It == OverlappedData.end()) 9340 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9341 Components, CombinedInfo, PartialStruct, 9342 IsFirstComponentList, IsImplicit, Mapper, 9343 /*ForDeviceAddr=*/false, VD, VarRef); 9344 IsFirstComponentList = false; 9345 } 9346 } 9347 9348 /// Generate the default map information for a given capture \a CI, 9349 /// record field declaration \a RI and captured value \a CV. 9350 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9351 const FieldDecl &RI, llvm::Value *CV, 9352 MapCombinedInfoTy &CombinedInfo) const { 9353 bool IsImplicit = true; 9354 // Do the default mapping. 9355 if (CI.capturesThis()) { 9356 CombinedInfo.Exprs.push_back(nullptr); 9357 CombinedInfo.BasePointers.push_back(CV); 9358 CombinedInfo.Pointers.push_back(CV); 9359 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9360 CombinedInfo.Sizes.push_back( 9361 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9362 CGF.Int64Ty, /*isSigned=*/true)); 9363 // Default map type. 9364 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9365 } else if (CI.capturesVariableByCopy()) { 9366 const VarDecl *VD = CI.getCapturedVar(); 9367 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9368 CombinedInfo.BasePointers.push_back(CV); 9369 CombinedInfo.Pointers.push_back(CV); 9370 if (!RI.getType()->isAnyPointerType()) { 9371 // We have to signal to the runtime captures passed by value that are 9372 // not pointers. 9373 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9374 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9375 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9376 } else { 9377 // Pointers are implicitly mapped with a zero size and no flags 9378 // (other than first map that is added for all implicit maps). 9379 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9380 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9381 } 9382 auto I = FirstPrivateDecls.find(VD); 9383 if (I != FirstPrivateDecls.end()) 9384 IsImplicit = I->getSecond(); 9385 } else { 9386 assert(CI.capturesVariable() && "Expected captured reference."); 9387 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9388 QualType ElementType = PtrTy->getPointeeType(); 9389 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9390 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9391 // The default map type for a scalar/complex type is 'to' because by 9392 // default the value doesn't have to be retrieved. For an aggregate 9393 // type, the default is 'tofrom'. 9394 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9395 const VarDecl *VD = CI.getCapturedVar(); 9396 auto I = FirstPrivateDecls.find(VD); 9397 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9398 CombinedInfo.BasePointers.push_back(CV); 9399 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9400 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9401 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9402 AlignmentSource::Decl)); 9403 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9404 } else { 9405 CombinedInfo.Pointers.push_back(CV); 9406 } 9407 if (I != FirstPrivateDecls.end()) 9408 IsImplicit = I->getSecond(); 9409 } 9410 // Every default map produces a single argument which is a target parameter. 9411 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9412 9413 // Add flag stating this is an implicit map. 9414 if (IsImplicit) 9415 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9416 9417 // No user-defined mapper for default mapping. 9418 CombinedInfo.Mappers.push_back(nullptr); 9419 } 9420 }; 9421 } // anonymous namespace 9422 9423 static void emitNonContiguousDescriptor( 9424 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9425 CGOpenMPRuntime::TargetDataInfo &Info) { 9426 CodeGenModule &CGM = CGF.CGM; 9427 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9428 &NonContigInfo = CombinedInfo.NonContigInfo; 9429 9430 // Build an array of struct descriptor_dim and then assign it to 9431 // offload_args. 9432 // 9433 // struct descriptor_dim { 9434 // uint64_t offset; 9435 // uint64_t count; 9436 // uint64_t stride 9437 // }; 9438 ASTContext &C = CGF.getContext(); 9439 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9440 RecordDecl *RD; 9441 RD = C.buildImplicitRecord("descriptor_dim"); 9442 RD->startDefinition(); 9443 addFieldToRecordDecl(C, RD, Int64Ty); 9444 addFieldToRecordDecl(C, RD, Int64Ty); 9445 addFieldToRecordDecl(C, RD, Int64Ty); 9446 RD->completeDefinition(); 9447 QualType DimTy = C.getRecordType(RD); 9448 9449 enum { OffsetFD = 0, CountFD, StrideFD }; 9450 // We need two index variable here since the size of "Dims" is the same as the 9451 // size of Components, however, the size of offset, count, and stride is equal 9452 // to the size of base declaration that is non-contiguous. 9453 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9454 // Skip emitting ir if dimension size is 1 since it cannot be 9455 // non-contiguous. 9456 if (NonContigInfo.Dims[I] == 1) 9457 continue; 9458 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9459 QualType ArrayTy = 9460 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9461 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9462 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9463 unsigned RevIdx = EE - II - 1; 9464 LValue DimsLVal = CGF.MakeAddrLValue( 9465 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9466 // Offset 9467 LValue OffsetLVal = CGF.EmitLValueForField( 9468 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9469 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9470 // Count 9471 LValue CountLVal = CGF.EmitLValueForField( 9472 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9473 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9474 // Stride 9475 LValue StrideLVal = CGF.EmitLValueForField( 9476 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9477 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9478 } 9479 // args[I] = &dims 9480 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9481 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); 9482 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9483 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9484 Info.PointersArray, 0, I); 9485 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); 9486 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9487 ++L; 9488 } 9489 } 9490 9491 // Try to extract the base declaration from a `this->x` expression if possible. 9492 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9493 if (!E) 9494 return nullptr; 9495 9496 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9497 if (const MemberExpr *ME = 9498 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9499 return ME->getMemberDecl(); 9500 return nullptr; 9501 } 9502 9503 /// Emit a string constant containing the names of the values mapped to the 9504 /// offloading runtime library. 9505 llvm::Constant * 9506 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9507 MappableExprsHandler::MappingExprInfo &MapExprs) { 9508 9509 uint32_t SrcLocStrSize; 9510 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9511 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9512 9513 SourceLocation Loc; 9514 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9515 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9516 Loc = VD->getLocation(); 9517 else 9518 Loc = MapExprs.getMapExpr()->getExprLoc(); 9519 } else { 9520 Loc = MapExprs.getMapDecl()->getLocation(); 9521 } 9522 9523 std::string ExprName; 9524 if (MapExprs.getMapExpr()) { 9525 PrintingPolicy P(CGF.getContext().getLangOpts()); 9526 llvm::raw_string_ostream OS(ExprName); 9527 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9528 OS.flush(); 9529 } else { 9530 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9531 } 9532 9533 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9534 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9535 PLoc.getLine(), PLoc.getColumn(), 9536 SrcLocStrSize); 9537 } 9538 9539 /// Emit the arrays used to pass the captures and map information to the 9540 /// offloading runtime library. If there is no map or capture information, 9541 /// return nullptr by reference. 9542 static void emitOffloadingArrays( 9543 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9544 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9545 bool IsNonContiguous = false) { 9546 CodeGenModule &CGM = CGF.CGM; 9547 ASTContext &Ctx = CGF.getContext(); 9548 9549 // Reset the array information. 9550 Info.clearArrayInfo(); 9551 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9552 9553 if (Info.NumberOfPtrs) { 9554 // Detect if we have any capture size requiring runtime evaluation of the 9555 // size so that a constant array could be eventually used. 9556 9557 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9558 QualType PointerArrayType = Ctx.getConstantArrayType( 9559 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9560 /*IndexTypeQuals=*/0); 9561 9562 Info.BasePointersArray = 9563 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9564 Info.PointersArray = 9565 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9566 Address MappersArray = 9567 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9568 Info.MappersArray = MappersArray.getPointer(); 9569 9570 // If we don't have any VLA types or other types that require runtime 9571 // evaluation, we can use a constant array for the map sizes, otherwise we 9572 // need to fill up the arrays as we do for the pointers. 9573 QualType Int64Ty = 9574 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9575 SmallVector<llvm::Constant *> ConstSizes( 9576 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9577 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); 9578 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9579 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { 9580 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { 9581 if (IsNonContiguous && (CombinedInfo.Types[I] & 9582 MappableExprsHandler::OMP_MAP_NON_CONTIG)) 9583 ConstSizes[I] = llvm::ConstantInt::get( 9584 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); 9585 else 9586 ConstSizes[I] = CI; 9587 continue; 9588 } 9589 } 9590 RuntimeSizes.set(I); 9591 } 9592 9593 if (RuntimeSizes.all()) { 9594 QualType SizeArrayType = Ctx.getConstantArrayType( 9595 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9596 /*IndexTypeQuals=*/0); 9597 Info.SizesArray = 9598 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9599 } else { 9600 auto *SizesArrayInit = llvm::ConstantArray::get( 9601 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9602 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9603 auto *SizesArrayGbl = new llvm::GlobalVariable( 9604 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, 9605 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); 9606 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9607 if (RuntimeSizes.any()) { 9608 QualType SizeArrayType = Ctx.getConstantArrayType( 9609 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9610 /*IndexTypeQuals=*/0); 9611 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); 9612 llvm::Value *GblConstPtr = 9613 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9614 SizesArrayGbl, CGM.Int64Ty->getPointerTo()); 9615 CGF.Builder.CreateMemCpy( 9616 Buffer, 9617 Address(GblConstPtr, CGM.Int64Ty, 9618 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( 9619 /*DestWidth=*/64, /*Signed=*/false))), 9620 CGF.getTypeSize(SizeArrayType)); 9621 Info.SizesArray = Buffer.getPointer(); 9622 } else { 9623 Info.SizesArray = SizesArrayGbl; 9624 } 9625 } 9626 9627 // The map types are always constant so we don't need to generate code to 9628 // fill arrays. Instead, we create an array constant. 9629 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9630 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9631 std::string MaptypesName = 9632 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9633 auto *MapTypesArrayGbl = 9634 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9635 Info.MapTypesArray = MapTypesArrayGbl; 9636 9637 // The information types are only built if there is debug information 9638 // requested. 9639 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9640 Info.MapNamesArray = llvm::Constant::getNullValue( 9641 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9642 } else { 9643 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9644 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9645 }; 9646 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9647 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9648 std::string MapnamesName = 9649 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9650 auto *MapNamesArrayGbl = 9651 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9652 Info.MapNamesArray = MapNamesArrayGbl; 9653 } 9654 9655 // If there's a present map type modifier, it must not be applied to the end 9656 // of a region, so generate a separate map type array in that case. 9657 if (Info.separateBeginEndCalls()) { 9658 bool EndMapTypesDiffer = false; 9659 for (uint64_t &Type : Mapping) { 9660 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9661 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9662 EndMapTypesDiffer = true; 9663 } 9664 } 9665 if (EndMapTypesDiffer) { 9666 MapTypesArrayGbl = 9667 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9668 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9669 } 9670 } 9671 9672 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9673 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9674 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9675 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9676 Info.BasePointersArray, 0, I); 9677 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9678 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9679 Address BPAddr(BP, BPVal->getType(), 9680 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9681 CGF.Builder.CreateStore(BPVal, BPAddr); 9682 9683 if (Info.requiresDevicePointerInfo()) 9684 if (const ValueDecl *DevVD = 9685 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9686 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9687 9688 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9689 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9690 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9691 Info.PointersArray, 0, I); 9692 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9693 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9694 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9695 CGF.Builder.CreateStore(PVal, PAddr); 9696 9697 if (RuntimeSizes.test(I)) { 9698 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9699 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9700 Info.SizesArray, 9701 /*Idx0=*/0, 9702 /*Idx1=*/I); 9703 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); 9704 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9705 CGM.Int64Ty, 9706 /*isSigned=*/true), 9707 SAddr); 9708 } 9709 9710 // Fill up the mapper array. 9711 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9712 if (CombinedInfo.Mappers[I]) { 9713 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9714 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9715 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9716 Info.HasMapper = true; 9717 } 9718 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9719 CGF.Builder.CreateStore(MFunc, MAddr); 9720 } 9721 } 9722 9723 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9724 Info.NumberOfPtrs == 0) 9725 return; 9726 9727 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9728 } 9729 9730 namespace { 9731 /// Additional arguments for emitOffloadingArraysArgument function. 9732 struct ArgumentsOptions { 9733 bool ForEndCall = false; 9734 ArgumentsOptions() = default; 9735 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9736 }; 9737 } // namespace 9738 9739 /// Emit the arguments to be passed to the runtime library based on the 9740 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9741 /// ForEndCall, emit map types to be passed for the end of the region instead of 9742 /// the beginning. 9743 static void emitOffloadingArraysArgument( 9744 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9745 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9746 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9747 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9748 const ArgumentsOptions &Options = ArgumentsOptions()) { 9749 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9750 "expected region end call to runtime only when end call is separate"); 9751 CodeGenModule &CGM = CGF.CGM; 9752 if (Info.NumberOfPtrs) { 9753 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9754 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9755 Info.BasePointersArray, 9756 /*Idx0=*/0, /*Idx1=*/0); 9757 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9758 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9759 Info.PointersArray, 9760 /*Idx0=*/0, 9761 /*Idx1=*/0); 9762 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9763 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9764 /*Idx0=*/0, /*Idx1=*/0); 9765 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9766 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9767 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9768 : Info.MapTypesArray, 9769 /*Idx0=*/0, 9770 /*Idx1=*/0); 9771 9772 // Only emit the mapper information arrays if debug information is 9773 // requested. 9774 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9775 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9776 else 9777 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9778 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9779 Info.MapNamesArray, 9780 /*Idx0=*/0, 9781 /*Idx1=*/0); 9782 // If there is no user-defined mapper, set the mapper array to nullptr to 9783 // avoid an unnecessary data privatization 9784 if (!Info.HasMapper) 9785 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9786 else 9787 MappersArrayArg = 9788 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9789 } else { 9790 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9791 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9792 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9793 MapTypesArrayArg = 9794 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9795 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9796 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9797 } 9798 } 9799 9800 /// Check for inner distribute directive. 9801 static const OMPExecutableDirective * 9802 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9803 const auto *CS = D.getInnermostCapturedStmt(); 9804 const auto *Body = 9805 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9806 const Stmt *ChildStmt = 9807 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9808 9809 if (const auto *NestedDir = 9810 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9811 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9812 switch (D.getDirectiveKind()) { 9813 case OMPD_target: 9814 if (isOpenMPDistributeDirective(DKind)) 9815 return NestedDir; 9816 if (DKind == OMPD_teams) { 9817 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9818 /*IgnoreCaptured=*/true); 9819 if (!Body) 9820 return nullptr; 9821 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9822 if (const auto *NND = 9823 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9824 DKind = NND->getDirectiveKind(); 9825 if (isOpenMPDistributeDirective(DKind)) 9826 return NND; 9827 } 9828 } 9829 return nullptr; 9830 case OMPD_target_teams: 9831 if (isOpenMPDistributeDirective(DKind)) 9832 return NestedDir; 9833 return nullptr; 9834 case OMPD_target_parallel: 9835 case OMPD_target_simd: 9836 case OMPD_target_parallel_for: 9837 case OMPD_target_parallel_for_simd: 9838 return nullptr; 9839 case OMPD_target_teams_distribute: 9840 case OMPD_target_teams_distribute_simd: 9841 case OMPD_target_teams_distribute_parallel_for: 9842 case OMPD_target_teams_distribute_parallel_for_simd: 9843 case OMPD_parallel: 9844 case OMPD_for: 9845 case OMPD_parallel_for: 9846 case OMPD_parallel_master: 9847 case OMPD_parallel_sections: 9848 case OMPD_for_simd: 9849 case OMPD_parallel_for_simd: 9850 case OMPD_cancel: 9851 case OMPD_cancellation_point: 9852 case OMPD_ordered: 9853 case OMPD_threadprivate: 9854 case OMPD_allocate: 9855 case OMPD_task: 9856 case OMPD_simd: 9857 case OMPD_tile: 9858 case OMPD_unroll: 9859 case OMPD_sections: 9860 case OMPD_section: 9861 case OMPD_single: 9862 case OMPD_master: 9863 case OMPD_critical: 9864 case OMPD_taskyield: 9865 case OMPD_barrier: 9866 case OMPD_taskwait: 9867 case OMPD_taskgroup: 9868 case OMPD_atomic: 9869 case OMPD_flush: 9870 case OMPD_depobj: 9871 case OMPD_scan: 9872 case OMPD_teams: 9873 case OMPD_target_data: 9874 case OMPD_target_exit_data: 9875 case OMPD_target_enter_data: 9876 case OMPD_distribute: 9877 case OMPD_distribute_simd: 9878 case OMPD_distribute_parallel_for: 9879 case OMPD_distribute_parallel_for_simd: 9880 case OMPD_teams_distribute: 9881 case OMPD_teams_distribute_simd: 9882 case OMPD_teams_distribute_parallel_for: 9883 case OMPD_teams_distribute_parallel_for_simd: 9884 case OMPD_target_update: 9885 case OMPD_declare_simd: 9886 case OMPD_declare_variant: 9887 case OMPD_begin_declare_variant: 9888 case OMPD_end_declare_variant: 9889 case OMPD_declare_target: 9890 case OMPD_end_declare_target: 9891 case OMPD_declare_reduction: 9892 case OMPD_declare_mapper: 9893 case OMPD_taskloop: 9894 case OMPD_taskloop_simd: 9895 case OMPD_master_taskloop: 9896 case OMPD_master_taskloop_simd: 9897 case OMPD_parallel_master_taskloop: 9898 case OMPD_parallel_master_taskloop_simd: 9899 case OMPD_requires: 9900 case OMPD_metadirective: 9901 case OMPD_unknown: 9902 default: 9903 llvm_unreachable("Unexpected directive."); 9904 } 9905 } 9906 9907 return nullptr; 9908 } 9909 9910 /// Emit the user-defined mapper function. The code generation follows the 9911 /// pattern in the example below. 9912 /// \code 9913 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9914 /// void *base, void *begin, 9915 /// int64_t size, int64_t type, 9916 /// void *name = nullptr) { 9917 /// // Allocate space for an array section first or add a base/begin for 9918 /// // pointer dereference. 9919 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9920 /// !maptype.IsDelete) 9921 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9922 /// size*sizeof(Ty), clearToFromMember(type)); 9923 /// // Map members. 9924 /// for (unsigned i = 0; i < size; i++) { 9925 /// // For each component specified by this mapper: 9926 /// for (auto c : begin[i]->all_components) { 9927 /// if (c.hasMapper()) 9928 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9929 /// c.arg_type, c.arg_name); 9930 /// else 9931 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9932 /// c.arg_begin, c.arg_size, c.arg_type, 9933 /// c.arg_name); 9934 /// } 9935 /// } 9936 /// // Delete the array section. 9937 /// if (size > 1 && maptype.IsDelete) 9938 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9939 /// size*sizeof(Ty), clearToFromMember(type)); 9940 /// } 9941 /// \endcode 9942 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9943 CodeGenFunction *CGF) { 9944 if (UDMMap.count(D) > 0) 9945 return; 9946 ASTContext &C = CGM.getContext(); 9947 QualType Ty = D->getType(); 9948 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9949 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9950 auto *MapperVarDecl = 9951 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9952 SourceLocation Loc = D->getLocation(); 9953 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9954 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9955 9956 // Prepare mapper function arguments and attributes. 9957 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9958 C.VoidPtrTy, ImplicitParamDecl::Other); 9959 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9960 ImplicitParamDecl::Other); 9961 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9962 C.VoidPtrTy, ImplicitParamDecl::Other); 9963 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9964 ImplicitParamDecl::Other); 9965 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9966 ImplicitParamDecl::Other); 9967 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9968 ImplicitParamDecl::Other); 9969 FunctionArgList Args; 9970 Args.push_back(&HandleArg); 9971 Args.push_back(&BaseArg); 9972 Args.push_back(&BeginArg); 9973 Args.push_back(&SizeArg); 9974 Args.push_back(&TypeArg); 9975 Args.push_back(&NameArg); 9976 const CGFunctionInfo &FnInfo = 9977 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9978 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9979 SmallString<64> TyStr; 9980 llvm::raw_svector_ostream Out(TyStr); 9981 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9982 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9983 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9984 Name, &CGM.getModule()); 9985 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9986 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9987 // Start the mapper function code generation. 9988 CodeGenFunction MapperCGF(CGM); 9989 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9990 // Compute the starting and end addresses of array elements. 9991 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9992 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9993 C.getPointerType(Int64Ty), Loc); 9994 // Prepare common arguments for array initiation and deletion. 9995 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9996 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9997 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9998 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9999 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10000 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10001 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10002 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10003 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10004 // Convert the size in bytes into the number of array elements. 10005 Size = MapperCGF.Builder.CreateExactUDiv( 10006 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10007 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10008 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10009 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 10010 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10011 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10012 C.getPointerType(Int64Ty), Loc); 10013 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10014 MapperCGF.GetAddrOfLocalVar(&NameArg), 10015 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10016 10017 // Emit array initiation if this is an array section and \p MapType indicates 10018 // that memory allocation is required. 10019 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10020 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10021 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10022 10023 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10024 10025 // Emit the loop header block. 10026 MapperCGF.EmitBlock(HeadBB); 10027 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10028 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10029 // Evaluate whether the initial condition is satisfied. 10030 llvm::Value *IsEmpty = 10031 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10032 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10033 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10034 10035 // Emit the loop body block. 10036 MapperCGF.EmitBlock(BodyBB); 10037 llvm::BasicBlock *LastBB = BodyBB; 10038 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10039 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10040 PtrPHI->addIncoming(PtrBegin, EntryBB); 10041 Address PtrCurrent(PtrPHI, ElemTy, 10042 MapperCGF.GetAddrOfLocalVar(&BeginArg) 10043 .getAlignment() 10044 .alignmentOfArrayElement(ElementSize)); 10045 // Privatize the declared variable of mapper to be the current array element. 10046 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10047 Scope.addPrivate(MapperVarDecl, PtrCurrent); 10048 (void)Scope.Privatize(); 10049 10050 // Get map clause information. Fill up the arrays with all mapped variables. 10051 MappableExprsHandler::MapCombinedInfoTy Info; 10052 MappableExprsHandler MEHandler(*D, MapperCGF); 10053 MEHandler.generateAllInfoForMapper(Info); 10054 10055 // Call the runtime API __tgt_mapper_num_components to get the number of 10056 // pre-existing components. 10057 llvm::Value *OffloadingArgs[] = {Handle}; 10058 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10059 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10060 OMPRTL___tgt_mapper_num_components), 10061 OffloadingArgs); 10062 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10063 PreviousSize, 10064 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10065 10066 // Fill up the runtime mapper handle for all components. 10067 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10068 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10069 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10070 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10071 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10072 llvm::Value *CurSizeArg = Info.Sizes[I]; 10073 llvm::Value *CurNameArg = 10074 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10075 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10076 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10077 10078 // Extract the MEMBER_OF field from the map type. 10079 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10080 llvm::Value *MemberMapType = 10081 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10082 10083 // Combine the map type inherited from user-defined mapper with that 10084 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10085 // bits of the \a MapType, which is the input argument of the mapper 10086 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10087 // bits of MemberMapType. 10088 // [OpenMP 5.0], 1.2.6. map-type decay. 10089 // | alloc | to | from | tofrom | release | delete 10090 // ---------------------------------------------------------- 10091 // alloc | alloc | alloc | alloc | alloc | release | delete 10092 // to | alloc | to | alloc | to | release | delete 10093 // from | alloc | alloc | from | from | release | delete 10094 // tofrom | alloc | to | from | tofrom | release | delete 10095 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10096 MapType, 10097 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10098 MappableExprsHandler::OMP_MAP_FROM)); 10099 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10100 llvm::BasicBlock *AllocElseBB = 10101 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10102 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10103 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10104 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10105 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10106 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10107 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10108 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10109 MapperCGF.EmitBlock(AllocBB); 10110 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10111 MemberMapType, 10112 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10113 MappableExprsHandler::OMP_MAP_FROM))); 10114 MapperCGF.Builder.CreateBr(EndBB); 10115 MapperCGF.EmitBlock(AllocElseBB); 10116 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10117 LeftToFrom, 10118 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10119 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10120 // In case of to, clear OMP_MAP_FROM. 10121 MapperCGF.EmitBlock(ToBB); 10122 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10123 MemberMapType, 10124 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10125 MapperCGF.Builder.CreateBr(EndBB); 10126 MapperCGF.EmitBlock(ToElseBB); 10127 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10128 LeftToFrom, 10129 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10130 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10131 // In case of from, clear OMP_MAP_TO. 10132 MapperCGF.EmitBlock(FromBB); 10133 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10134 MemberMapType, 10135 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10136 // In case of tofrom, do nothing. 10137 MapperCGF.EmitBlock(EndBB); 10138 LastBB = EndBB; 10139 llvm::PHINode *CurMapType = 10140 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10141 CurMapType->addIncoming(AllocMapType, AllocBB); 10142 CurMapType->addIncoming(ToMapType, ToBB); 10143 CurMapType->addIncoming(FromMapType, FromBB); 10144 CurMapType->addIncoming(MemberMapType, ToElseBB); 10145 10146 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10147 CurSizeArg, CurMapType, CurNameArg}; 10148 if (Info.Mappers[I]) { 10149 // Call the corresponding mapper function. 10150 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10151 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10152 assert(MapperFunc && "Expect a valid mapper function is available."); 10153 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10154 } else { 10155 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10156 // data structure. 10157 MapperCGF.EmitRuntimeCall( 10158 OMPBuilder.getOrCreateRuntimeFunction( 10159 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10160 OffloadingArgs); 10161 } 10162 } 10163 10164 // Update the pointer to point to the next element that needs to be mapped, 10165 // and check whether we have mapped all elements. 10166 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10167 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10168 PtrPHI->addIncoming(PtrNext, LastBB); 10169 llvm::Value *IsDone = 10170 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10171 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10172 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10173 10174 MapperCGF.EmitBlock(ExitBB); 10175 // Emit array deletion if this is an array section and \p MapType indicates 10176 // that deletion is required. 10177 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10178 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10179 10180 // Emit the function exit block. 10181 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10182 MapperCGF.FinishFunction(); 10183 UDMMap.try_emplace(D, Fn); 10184 if (CGF) { 10185 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10186 Decls.second.push_back(D); 10187 } 10188 } 10189 10190 /// Emit the array initialization or deletion portion for user-defined mapper 10191 /// code generation. First, it evaluates whether an array section is mapped and 10192 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10193 /// true, and \a MapType indicates to not delete this array, array 10194 /// initialization code is generated. If \a IsInit is false, and \a MapType 10195 /// indicates to not this array, array deletion code is generated. 10196 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10197 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10198 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10199 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10200 bool IsInit) { 10201 StringRef Prefix = IsInit ? ".init" : ".del"; 10202 10203 // Evaluate if this is an array section. 10204 llvm::BasicBlock *BodyBB = 10205 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10206 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10207 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10208 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10209 MapType, 10210 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10211 llvm::Value *DeleteCond; 10212 llvm::Value *Cond; 10213 if (IsInit) { 10214 // base != begin? 10215 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10216 // IsPtrAndObj? 10217 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10218 MapType, 10219 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10220 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10221 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10222 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10223 DeleteCond = MapperCGF.Builder.CreateIsNull( 10224 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10225 } else { 10226 Cond = IsArray; 10227 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10228 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10229 } 10230 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10231 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10232 10233 MapperCGF.EmitBlock(BodyBB); 10234 // Get the array size by multiplying element size and element number (i.e., \p 10235 // Size). 10236 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10237 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10238 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10239 // memory allocation/deletion purpose only. 10240 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10241 MapType, 10242 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10243 MappableExprsHandler::OMP_MAP_FROM))); 10244 MapTypeArg = MapperCGF.Builder.CreateOr( 10245 MapTypeArg, 10246 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10247 10248 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10249 // data structure. 10250 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10251 ArraySize, MapTypeArg, MapName}; 10252 MapperCGF.EmitRuntimeCall( 10253 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10254 OMPRTL___tgt_push_mapper_component), 10255 OffloadingArgs); 10256 } 10257 10258 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10259 const OMPDeclareMapperDecl *D) { 10260 auto I = UDMMap.find(D); 10261 if (I != UDMMap.end()) 10262 return I->second; 10263 emitUserDefinedMapper(D); 10264 return UDMMap.lookup(D); 10265 } 10266 10267 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10268 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10269 llvm::Value *DeviceID, 10270 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10271 const OMPLoopDirective &D)> 10272 SizeEmitter) { 10273 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10274 const OMPExecutableDirective *TD = &D; 10275 // Get nested teams distribute kind directive, if any. 10276 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10277 TD = getNestedDistributeDirective(CGM.getContext(), D); 10278 if (!TD) 10279 return; 10280 const auto *LD = cast<OMPLoopDirective>(TD); 10281 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10282 PrePostActionTy &) { 10283 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10284 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10285 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10286 CGF.EmitRuntimeCall( 10287 OMPBuilder.getOrCreateRuntimeFunction( 10288 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10289 Args); 10290 } 10291 }; 10292 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10293 } 10294 10295 void CGOpenMPRuntime::emitTargetCall( 10296 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10297 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10298 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10299 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10300 const OMPLoopDirective &D)> 10301 SizeEmitter) { 10302 if (!CGF.HaveInsertPoint()) 10303 return; 10304 10305 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && 10306 CGM.getLangOpts().OpenMPOffloadMandatory; 10307 10308 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 10309 10310 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10311 D.hasClausesOfKind<OMPNowaitClause>(); 10312 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10313 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10314 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10315 PrePostActionTy &) { 10316 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10317 }; 10318 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10319 10320 CodeGenFunction::OMPTargetDataInfo InputInfo; 10321 llvm::Value *MapTypesArray = nullptr; 10322 llvm::Value *MapNamesArray = nullptr; 10323 // Generate code for the host fallback function. 10324 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, 10325 &CS, OffloadingMandatory](CodeGenFunction &CGF) { 10326 if (OffloadingMandatory) { 10327 CGF.Builder.CreateUnreachable(); 10328 } else { 10329 if (RequiresOuterTask) { 10330 CapturedVars.clear(); 10331 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10332 } 10333 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10334 } 10335 }; 10336 // Fill up the pointer arrays and transfer execution to the device. 10337 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, 10338 &MapNamesArray, SizeEmitter, 10339 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10340 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10341 // Reverse offloading is not supported, so just execute on the host. 10342 FallbackGen(CGF); 10343 return; 10344 } 10345 10346 // On top of the arrays that were filled up, the target offloading call 10347 // takes as arguments the device id as well as the host pointer. The host 10348 // pointer is used by the runtime library to identify the current target 10349 // region, so it only has to be unique and not necessarily point to 10350 // anything. It could be the pointer to the outlined function that 10351 // implements the target region, but we aren't using that so that the 10352 // compiler doesn't need to keep that, and could therefore inline the host 10353 // function if proven worthwhile during optimization. 10354 10355 // From this point on, we need to have an ID of the target region defined. 10356 assert(OutlinedFnID && "Invalid outlined function ID!"); 10357 (void)OutlinedFnID; 10358 10359 // Emit device ID if any. 10360 llvm::Value *DeviceID; 10361 if (Device.getPointer()) { 10362 assert((Device.getInt() == OMPC_DEVICE_unknown || 10363 Device.getInt() == OMPC_DEVICE_device_num) && 10364 "Expected device_num modifier."); 10365 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10366 DeviceID = 10367 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10368 } else { 10369 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10370 } 10371 10372 // Emit the number of elements in the offloading arrays. 10373 llvm::Value *PointerNum = 10374 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10375 10376 // Return value of the runtime offloading call. 10377 llvm::Value *Return; 10378 10379 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10380 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10381 10382 // Source location for the ident struct 10383 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10384 10385 // Emit tripcount for the target loop-based directive. 10386 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10387 10388 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10389 // The target region is an outlined function launched by the runtime 10390 // via calls __tgt_target() or __tgt_target_teams(). 10391 // 10392 // __tgt_target() launches a target region with one team and one thread, 10393 // executing a serial region. This master thread may in turn launch 10394 // more threads within its team upon encountering a parallel region, 10395 // however, no additional teams can be launched on the device. 10396 // 10397 // __tgt_target_teams() launches a target region with one or more teams, 10398 // each with one or more threads. This call is required for target 10399 // constructs such as: 10400 // 'target teams' 10401 // 'target' / 'teams' 10402 // 'target teams distribute parallel for' 10403 // 'target parallel' 10404 // and so on. 10405 // 10406 // Note that on the host and CPU targets, the runtime implementation of 10407 // these calls simply call the outlined function without forking threads. 10408 // The outlined functions themselves have runtime calls to 10409 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10410 // the compiler in emitTeamsCall() and emitParallelCall(). 10411 // 10412 // In contrast, on the NVPTX target, the implementation of 10413 // __tgt_target_teams() launches a GPU kernel with the requested number 10414 // of teams and threads so no additional calls to the runtime are required. 10415 if (NumTeams) { 10416 // If we have NumTeams defined this means that we have an enclosed teams 10417 // region. Therefore we also expect to have NumThreads defined. These two 10418 // values should be defined in the presence of a teams directive, 10419 // regardless of having any clauses associated. If the user is using teams 10420 // but no clauses, these two values will be the default that should be 10421 // passed to the runtime library - a 32-bit integer with the value zero. 10422 assert(NumThreads && "Thread limit expression should be available along " 10423 "with number of teams."); 10424 SmallVector<llvm::Value *> OffloadingArgs = { 10425 RTLoc, 10426 DeviceID, 10427 OutlinedFnID, 10428 PointerNum, 10429 InputInfo.BasePointersArray.getPointer(), 10430 InputInfo.PointersArray.getPointer(), 10431 InputInfo.SizesArray.getPointer(), 10432 MapTypesArray, 10433 MapNamesArray, 10434 InputInfo.MappersArray.getPointer(), 10435 NumTeams, 10436 NumThreads}; 10437 if (HasNowait) { 10438 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10439 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10440 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10441 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10442 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10443 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10444 } 10445 Return = CGF.EmitRuntimeCall( 10446 OMPBuilder.getOrCreateRuntimeFunction( 10447 CGM.getModule(), HasNowait 10448 ? OMPRTL___tgt_target_teams_nowait_mapper 10449 : OMPRTL___tgt_target_teams_mapper), 10450 OffloadingArgs); 10451 } else { 10452 SmallVector<llvm::Value *> OffloadingArgs = { 10453 RTLoc, 10454 DeviceID, 10455 OutlinedFnID, 10456 PointerNum, 10457 InputInfo.BasePointersArray.getPointer(), 10458 InputInfo.PointersArray.getPointer(), 10459 InputInfo.SizesArray.getPointer(), 10460 MapTypesArray, 10461 MapNamesArray, 10462 InputInfo.MappersArray.getPointer()}; 10463 if (HasNowait) { 10464 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10465 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10466 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10467 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10468 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10469 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10470 } 10471 Return = CGF.EmitRuntimeCall( 10472 OMPBuilder.getOrCreateRuntimeFunction( 10473 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10474 : OMPRTL___tgt_target_mapper), 10475 OffloadingArgs); 10476 } 10477 10478 // Check the error code and execute the host version if required. 10479 llvm::BasicBlock *OffloadFailedBlock = 10480 CGF.createBasicBlock("omp_offload.failed"); 10481 llvm::BasicBlock *OffloadContBlock = 10482 CGF.createBasicBlock("omp_offload.cont"); 10483 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10484 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10485 10486 CGF.EmitBlock(OffloadFailedBlock); 10487 FallbackGen(CGF); 10488 10489 CGF.EmitBranch(OffloadContBlock); 10490 10491 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10492 }; 10493 10494 // Notify that the host version must be executed. 10495 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { 10496 FallbackGen(CGF); 10497 }; 10498 10499 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10500 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10501 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10502 // Fill up the arrays with all the captured variables. 10503 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10504 10505 // Get mappable expression information. 10506 MappableExprsHandler MEHandler(D, CGF); 10507 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10508 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10509 10510 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10511 auto *CV = CapturedVars.begin(); 10512 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10513 CE = CS.capture_end(); 10514 CI != CE; ++CI, ++RI, ++CV) { 10515 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10516 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10517 10518 // VLA sizes are passed to the outlined region by copy and do not have map 10519 // information associated. 10520 if (CI->capturesVariableArrayType()) { 10521 CurInfo.Exprs.push_back(nullptr); 10522 CurInfo.BasePointers.push_back(*CV); 10523 CurInfo.Pointers.push_back(*CV); 10524 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10525 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10526 // Copy to the device as an argument. No need to retrieve it. 10527 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10528 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10529 MappableExprsHandler::OMP_MAP_IMPLICIT); 10530 CurInfo.Mappers.push_back(nullptr); 10531 } else { 10532 // If we have any information in the map clause, we use it, otherwise we 10533 // just do a default mapping. 10534 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10535 if (!CI->capturesThis()) 10536 MappedVarSet.insert(CI->getCapturedVar()); 10537 else 10538 MappedVarSet.insert(nullptr); 10539 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10540 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10541 // Generate correct mapping for variables captured by reference in 10542 // lambdas. 10543 if (CI->capturesVariable()) 10544 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10545 CurInfo, LambdaPointers); 10546 } 10547 // We expect to have at least an element of information for this capture. 10548 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10549 "Non-existing map pointer for capture!"); 10550 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10551 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10552 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10553 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10554 "Inconsistent map information sizes!"); 10555 10556 // If there is an entry in PartialStruct it means we have a struct with 10557 // individual members mapped. Emit an extra combined entry. 10558 if (PartialStruct.Base.isValid()) { 10559 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10560 MEHandler.emitCombinedEntry( 10561 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10562 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10563 } 10564 10565 // We need to append the results of this capture to what we already have. 10566 CombinedInfo.append(CurInfo); 10567 } 10568 // Adjust MEMBER_OF flags for the lambdas captures. 10569 MEHandler.adjustMemberOfForLambdaCaptures( 10570 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10571 CombinedInfo.Types); 10572 // Map any list items in a map clause that were not captures because they 10573 // weren't referenced within the construct. 10574 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10575 10576 TargetDataInfo Info; 10577 // Fill up the arrays and create the arguments. 10578 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10579 emitOffloadingArraysArgument( 10580 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10581 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10582 {/*ForEndCall=*/false}); 10583 10584 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10585 InputInfo.BasePointersArray = 10586 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10587 InputInfo.PointersArray = 10588 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10589 InputInfo.SizesArray = 10590 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10591 InputInfo.MappersArray = 10592 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10593 MapTypesArray = Info.MapTypesArray; 10594 MapNamesArray = Info.MapNamesArray; 10595 if (RequiresOuterTask) 10596 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10597 else 10598 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10599 }; 10600 10601 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10602 CodeGenFunction &CGF, PrePostActionTy &) { 10603 if (RequiresOuterTask) { 10604 CodeGenFunction::OMPTargetDataInfo InputInfo; 10605 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10606 } else { 10607 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10608 } 10609 }; 10610 10611 // If we have a target function ID it means that we need to support 10612 // offloading, otherwise, just execute on the host. We need to execute on host 10613 // regardless of the conditional in the if clause if, e.g., the user do not 10614 // specify target triples. 10615 if (OutlinedFnID) { 10616 if (IfCond) { 10617 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10618 } else { 10619 RegionCodeGenTy ThenRCG(TargetThenGen); 10620 ThenRCG(CGF); 10621 } 10622 } else { 10623 RegionCodeGenTy ElseRCG(TargetElseGen); 10624 ElseRCG(CGF); 10625 } 10626 } 10627 10628 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10629 StringRef ParentName) { 10630 if (!S) 10631 return; 10632 10633 // Codegen OMP target directives that offload compute to the device. 10634 bool RequiresDeviceCodegen = 10635 isa<OMPExecutableDirective>(S) && 10636 isOpenMPTargetExecutionDirective( 10637 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10638 10639 if (RequiresDeviceCodegen) { 10640 const auto &E = *cast<OMPExecutableDirective>(S); 10641 unsigned DeviceID; 10642 unsigned FileID; 10643 unsigned Line; 10644 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10645 FileID, Line); 10646 10647 // Is this a target region that should not be emitted as an entry point? If 10648 // so just signal we are done with this target region. 10649 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10650 ParentName, Line)) 10651 return; 10652 10653 switch (E.getDirectiveKind()) { 10654 case OMPD_target: 10655 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10656 cast<OMPTargetDirective>(E)); 10657 break; 10658 case OMPD_target_parallel: 10659 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10660 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10661 break; 10662 case OMPD_target_teams: 10663 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10664 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10665 break; 10666 case OMPD_target_teams_distribute: 10667 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10668 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10669 break; 10670 case OMPD_target_teams_distribute_simd: 10671 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10672 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10673 break; 10674 case OMPD_target_parallel_for: 10675 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10676 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10677 break; 10678 case OMPD_target_parallel_for_simd: 10679 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10680 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10681 break; 10682 case OMPD_target_simd: 10683 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10684 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10685 break; 10686 case OMPD_target_teams_distribute_parallel_for: 10687 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10688 CGM, ParentName, 10689 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10690 break; 10691 case OMPD_target_teams_distribute_parallel_for_simd: 10692 CodeGenFunction:: 10693 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10694 CGM, ParentName, 10695 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10696 break; 10697 case OMPD_parallel: 10698 case OMPD_for: 10699 case OMPD_parallel_for: 10700 case OMPD_parallel_master: 10701 case OMPD_parallel_sections: 10702 case OMPD_for_simd: 10703 case OMPD_parallel_for_simd: 10704 case OMPD_cancel: 10705 case OMPD_cancellation_point: 10706 case OMPD_ordered: 10707 case OMPD_threadprivate: 10708 case OMPD_allocate: 10709 case OMPD_task: 10710 case OMPD_simd: 10711 case OMPD_tile: 10712 case OMPD_unroll: 10713 case OMPD_sections: 10714 case OMPD_section: 10715 case OMPD_single: 10716 case OMPD_master: 10717 case OMPD_critical: 10718 case OMPD_taskyield: 10719 case OMPD_barrier: 10720 case OMPD_taskwait: 10721 case OMPD_taskgroup: 10722 case OMPD_atomic: 10723 case OMPD_flush: 10724 case OMPD_depobj: 10725 case OMPD_scan: 10726 case OMPD_teams: 10727 case OMPD_target_data: 10728 case OMPD_target_exit_data: 10729 case OMPD_target_enter_data: 10730 case OMPD_distribute: 10731 case OMPD_distribute_simd: 10732 case OMPD_distribute_parallel_for: 10733 case OMPD_distribute_parallel_for_simd: 10734 case OMPD_teams_distribute: 10735 case OMPD_teams_distribute_simd: 10736 case OMPD_teams_distribute_parallel_for: 10737 case OMPD_teams_distribute_parallel_for_simd: 10738 case OMPD_target_update: 10739 case OMPD_declare_simd: 10740 case OMPD_declare_variant: 10741 case OMPD_begin_declare_variant: 10742 case OMPD_end_declare_variant: 10743 case OMPD_declare_target: 10744 case OMPD_end_declare_target: 10745 case OMPD_declare_reduction: 10746 case OMPD_declare_mapper: 10747 case OMPD_taskloop: 10748 case OMPD_taskloop_simd: 10749 case OMPD_master_taskloop: 10750 case OMPD_master_taskloop_simd: 10751 case OMPD_parallel_master_taskloop: 10752 case OMPD_parallel_master_taskloop_simd: 10753 case OMPD_requires: 10754 case OMPD_metadirective: 10755 case OMPD_unknown: 10756 default: 10757 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10758 } 10759 return; 10760 } 10761 10762 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10763 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10764 return; 10765 10766 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10767 return; 10768 } 10769 10770 // If this is a lambda function, look into its body. 10771 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10772 S = L->getBody(); 10773 10774 // Keep looking for target regions recursively. 10775 for (const Stmt *II : S->children()) 10776 scanForTargetRegionsFunctions(II, ParentName); 10777 } 10778 10779 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10780 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10781 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10782 if (!DevTy) 10783 return false; 10784 // Do not emit device_type(nohost) functions for the host. 10785 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10786 return true; 10787 // Do not emit device_type(host) functions for the device. 10788 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10789 return true; 10790 return false; 10791 } 10792 10793 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10794 // If emitting code for the host, we do not process FD here. Instead we do 10795 // the normal code generation. 10796 if (!CGM.getLangOpts().OpenMPIsDevice) { 10797 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10798 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10799 CGM.getLangOpts().OpenMPIsDevice)) 10800 return true; 10801 return false; 10802 } 10803 10804 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10805 // Try to detect target regions in the function. 10806 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10807 StringRef Name = CGM.getMangledName(GD); 10808 scanForTargetRegionsFunctions(FD->getBody(), Name); 10809 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10810 CGM.getLangOpts().OpenMPIsDevice)) 10811 return true; 10812 } 10813 10814 // Do not to emit function if it is not marked as declare target. 10815 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10816 AlreadyEmittedTargetDecls.count(VD) == 0; 10817 } 10818 10819 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10820 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10821 CGM.getLangOpts().OpenMPIsDevice)) 10822 return true; 10823 10824 if (!CGM.getLangOpts().OpenMPIsDevice) 10825 return false; 10826 10827 // Check if there are Ctors/Dtors in this declaration and look for target 10828 // regions in it. We use the complete variant to produce the kernel name 10829 // mangling. 10830 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10831 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10832 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10833 StringRef ParentName = 10834 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10835 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10836 } 10837 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10838 StringRef ParentName = 10839 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10840 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10841 } 10842 } 10843 10844 // Do not to emit variable if it is not marked as declare target. 10845 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10846 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10847 cast<VarDecl>(GD.getDecl())); 10848 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10849 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10850 HasRequiresUnifiedSharedMemory)) { 10851 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10852 return true; 10853 } 10854 return false; 10855 } 10856 10857 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10858 llvm::Constant *Addr) { 10859 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10860 !CGM.getLangOpts().OpenMPIsDevice) 10861 return; 10862 10863 // If we have host/nohost variables, they do not need to be registered. 10864 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10865 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10866 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10867 return; 10868 10869 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10870 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10871 if (!Res) { 10872 if (CGM.getLangOpts().OpenMPIsDevice) { 10873 // Register non-target variables being emitted in device code (debug info 10874 // may cause this). 10875 StringRef VarName = CGM.getMangledName(VD); 10876 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10877 } 10878 return; 10879 } 10880 // Register declare target variables. 10881 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10882 StringRef VarName; 10883 CharUnits VarSize; 10884 llvm::GlobalValue::LinkageTypes Linkage; 10885 10886 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10887 !HasRequiresUnifiedSharedMemory) { 10888 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10889 VarName = CGM.getMangledName(VD); 10890 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10891 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10892 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10893 } else { 10894 VarSize = CharUnits::Zero(); 10895 } 10896 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10897 // Temp solution to prevent optimizations of the internal variables. 10898 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10899 // Do not create a "ref-variable" if the original is not also available 10900 // on the host. 10901 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10902 return; 10903 std::string RefName = getName({VarName, "ref"}); 10904 if (!CGM.GetGlobalValue(RefName)) { 10905 llvm::Constant *AddrRef = 10906 getOrCreateInternalVariable(Addr->getType(), RefName); 10907 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10908 GVAddrRef->setConstant(/*Val=*/true); 10909 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10910 GVAddrRef->setInitializer(Addr); 10911 CGM.addCompilerUsedGlobal(GVAddrRef); 10912 } 10913 } 10914 } else { 10915 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10916 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10917 HasRequiresUnifiedSharedMemory)) && 10918 "Declare target attribute must link or to with unified memory."); 10919 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10920 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10921 else 10922 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10923 10924 if (CGM.getLangOpts().OpenMPIsDevice) { 10925 VarName = Addr->getName(); 10926 Addr = nullptr; 10927 } else { 10928 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10929 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10930 } 10931 VarSize = CGM.getPointerSize(); 10932 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10933 } 10934 10935 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10936 VarName, Addr, VarSize, Flags, Linkage); 10937 } 10938 10939 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10940 if (isa<FunctionDecl>(GD.getDecl()) || 10941 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10942 return emitTargetFunctions(GD); 10943 10944 return emitTargetGlobalVariable(GD); 10945 } 10946 10947 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10948 for (const VarDecl *VD : DeferredGlobalVariables) { 10949 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10950 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10951 if (!Res) 10952 continue; 10953 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10954 !HasRequiresUnifiedSharedMemory) { 10955 CGM.EmitGlobal(VD); 10956 } else { 10957 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10958 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10959 HasRequiresUnifiedSharedMemory)) && 10960 "Expected link clause or to clause with unified memory."); 10961 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10962 } 10963 } 10964 } 10965 10966 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10967 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10968 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10969 " Expected target-based directive."); 10970 } 10971 10972 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10973 for (const OMPClause *Clause : D->clauselists()) { 10974 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10975 HasRequiresUnifiedSharedMemory = true; 10976 } else if (const auto *AC = 10977 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10978 switch (AC->getAtomicDefaultMemOrderKind()) { 10979 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10980 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10981 break; 10982 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10983 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10984 break; 10985 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10986 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10987 break; 10988 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10989 break; 10990 } 10991 } 10992 } 10993 } 10994 10995 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10996 return RequiresAtomicOrdering; 10997 } 10998 10999 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11000 LangAS &AS) { 11001 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11002 return false; 11003 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11004 switch(A->getAllocatorType()) { 11005 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11006 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11007 // Not supported, fallback to the default mem space. 11008 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11009 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11010 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11011 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11012 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11013 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11014 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11015 AS = LangAS::Default; 11016 return true; 11017 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11018 llvm_unreachable("Expected predefined allocator for the variables with the " 11019 "static storage."); 11020 } 11021 return false; 11022 } 11023 11024 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11025 return HasRequiresUnifiedSharedMemory; 11026 } 11027 11028 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11029 CodeGenModule &CGM) 11030 : CGM(CGM) { 11031 if (CGM.getLangOpts().OpenMPIsDevice) { 11032 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11033 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11034 } 11035 } 11036 11037 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11038 if (CGM.getLangOpts().OpenMPIsDevice) 11039 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11040 } 11041 11042 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11043 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11044 return true; 11045 11046 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11047 // Do not to emit function if it is marked as declare target as it was already 11048 // emitted. 11049 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11050 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11051 if (auto *F = dyn_cast_or_null<llvm::Function>( 11052 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11053 return !F->isDeclaration(); 11054 return false; 11055 } 11056 return true; 11057 } 11058 11059 return !AlreadyEmittedTargetDecls.insert(D).second; 11060 } 11061 11062 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11063 // If we don't have entries or if we are emitting code for the device, we 11064 // don't need to do anything. 11065 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11066 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11067 (OffloadEntriesInfoManager.empty() && 11068 !HasEmittedDeclareTargetRegion && 11069 !HasEmittedTargetRegion)) 11070 return nullptr; 11071 11072 // Create and register the function that handles the requires directives. 11073 ASTContext &C = CGM.getContext(); 11074 11075 llvm::Function *RequiresRegFn; 11076 { 11077 CodeGenFunction CGF(CGM); 11078 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11079 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11080 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11081 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11082 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11083 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11084 // TODO: check for other requires clauses. 11085 // The requires directive takes effect only when a target region is 11086 // present in the compilation unit. Otherwise it is ignored and not 11087 // passed to the runtime. This avoids the runtime from throwing an error 11088 // for mismatching requires clauses across compilation units that don't 11089 // contain at least 1 target region. 11090 assert((HasEmittedTargetRegion || 11091 HasEmittedDeclareTargetRegion || 11092 !OffloadEntriesInfoManager.empty()) && 11093 "Target or declare target region expected."); 11094 if (HasRequiresUnifiedSharedMemory) 11095 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11096 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11097 CGM.getModule(), OMPRTL___tgt_register_requires), 11098 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11099 CGF.FinishFunction(); 11100 } 11101 return RequiresRegFn; 11102 } 11103 11104 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11105 const OMPExecutableDirective &D, 11106 SourceLocation Loc, 11107 llvm::Function *OutlinedFn, 11108 ArrayRef<llvm::Value *> CapturedVars) { 11109 if (!CGF.HaveInsertPoint()) 11110 return; 11111 11112 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11113 CodeGenFunction::RunCleanupsScope Scope(CGF); 11114 11115 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11116 llvm::Value *Args[] = { 11117 RTLoc, 11118 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11119 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11120 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11121 RealArgs.append(std::begin(Args), std::end(Args)); 11122 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11123 11124 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11125 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11126 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11127 } 11128 11129 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11130 const Expr *NumTeams, 11131 const Expr *ThreadLimit, 11132 SourceLocation Loc) { 11133 if (!CGF.HaveInsertPoint()) 11134 return; 11135 11136 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11137 11138 llvm::Value *NumTeamsVal = 11139 NumTeams 11140 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11141 CGF.CGM.Int32Ty, /* isSigned = */ true) 11142 : CGF.Builder.getInt32(0); 11143 11144 llvm::Value *ThreadLimitVal = 11145 ThreadLimit 11146 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11147 CGF.CGM.Int32Ty, /* isSigned = */ true) 11148 : CGF.Builder.getInt32(0); 11149 11150 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11151 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11152 ThreadLimitVal}; 11153 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11154 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11155 PushNumTeamsArgs); 11156 } 11157 11158 void CGOpenMPRuntime::emitTargetDataCalls( 11159 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11160 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11161 if (!CGF.HaveInsertPoint()) 11162 return; 11163 11164 // Action used to replace the default codegen action and turn privatization 11165 // off. 11166 PrePostActionTy NoPrivAction; 11167 11168 // Generate the code for the opening of the data environment. Capture all the 11169 // arguments of the runtime call by reference because they are used in the 11170 // closing of the region. 11171 auto &&BeginThenGen = [this, &D, Device, &Info, 11172 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11173 // Fill up the arrays with all the mapped variables. 11174 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11175 11176 // Get map clause information. 11177 MappableExprsHandler MEHandler(D, CGF); 11178 MEHandler.generateAllInfo(CombinedInfo); 11179 11180 // Fill up the arrays and create the arguments. 11181 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11182 /*IsNonContiguous=*/true); 11183 11184 llvm::Value *BasePointersArrayArg = nullptr; 11185 llvm::Value *PointersArrayArg = nullptr; 11186 llvm::Value *SizesArrayArg = nullptr; 11187 llvm::Value *MapTypesArrayArg = nullptr; 11188 llvm::Value *MapNamesArrayArg = nullptr; 11189 llvm::Value *MappersArrayArg = nullptr; 11190 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11191 SizesArrayArg, MapTypesArrayArg, 11192 MapNamesArrayArg, MappersArrayArg, Info); 11193 11194 // Emit device ID if any. 11195 llvm::Value *DeviceID = nullptr; 11196 if (Device) { 11197 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11198 CGF.Int64Ty, /*isSigned=*/true); 11199 } else { 11200 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11201 } 11202 11203 // Emit the number of elements in the offloading arrays. 11204 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11205 // 11206 // Source location for the ident struct 11207 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11208 11209 llvm::Value *OffloadingArgs[] = {RTLoc, 11210 DeviceID, 11211 PointerNum, 11212 BasePointersArrayArg, 11213 PointersArrayArg, 11214 SizesArrayArg, 11215 MapTypesArrayArg, 11216 MapNamesArrayArg, 11217 MappersArrayArg}; 11218 CGF.EmitRuntimeCall( 11219 OMPBuilder.getOrCreateRuntimeFunction( 11220 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11221 OffloadingArgs); 11222 11223 // If device pointer privatization is required, emit the body of the region 11224 // here. It will have to be duplicated: with and without privatization. 11225 if (!Info.CaptureDeviceAddrMap.empty()) 11226 CodeGen(CGF); 11227 }; 11228 11229 // Generate code for the closing of the data region. 11230 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11231 PrePostActionTy &) { 11232 assert(Info.isValid() && "Invalid data environment closing arguments."); 11233 11234 llvm::Value *BasePointersArrayArg = nullptr; 11235 llvm::Value *PointersArrayArg = nullptr; 11236 llvm::Value *SizesArrayArg = nullptr; 11237 llvm::Value *MapTypesArrayArg = nullptr; 11238 llvm::Value *MapNamesArrayArg = nullptr; 11239 llvm::Value *MappersArrayArg = nullptr; 11240 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11241 SizesArrayArg, MapTypesArrayArg, 11242 MapNamesArrayArg, MappersArrayArg, Info, 11243 {/*ForEndCall=*/true}); 11244 11245 // Emit device ID if any. 11246 llvm::Value *DeviceID = nullptr; 11247 if (Device) { 11248 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11249 CGF.Int64Ty, /*isSigned=*/true); 11250 } else { 11251 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11252 } 11253 11254 // Emit the number of elements in the offloading arrays. 11255 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11256 11257 // Source location for the ident struct 11258 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11259 11260 llvm::Value *OffloadingArgs[] = {RTLoc, 11261 DeviceID, 11262 PointerNum, 11263 BasePointersArrayArg, 11264 PointersArrayArg, 11265 SizesArrayArg, 11266 MapTypesArrayArg, 11267 MapNamesArrayArg, 11268 MappersArrayArg}; 11269 CGF.EmitRuntimeCall( 11270 OMPBuilder.getOrCreateRuntimeFunction( 11271 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11272 OffloadingArgs); 11273 }; 11274 11275 // If we need device pointer privatization, we need to emit the body of the 11276 // region with no privatization in the 'else' branch of the conditional. 11277 // Otherwise, we don't have to do anything. 11278 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11279 PrePostActionTy &) { 11280 if (!Info.CaptureDeviceAddrMap.empty()) { 11281 CodeGen.setAction(NoPrivAction); 11282 CodeGen(CGF); 11283 } 11284 }; 11285 11286 // We don't have to do anything to close the region if the if clause evaluates 11287 // to false. 11288 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11289 11290 if (IfCond) { 11291 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11292 } else { 11293 RegionCodeGenTy RCG(BeginThenGen); 11294 RCG(CGF); 11295 } 11296 11297 // If we don't require privatization of device pointers, we emit the body in 11298 // between the runtime calls. This avoids duplicating the body code. 11299 if (Info.CaptureDeviceAddrMap.empty()) { 11300 CodeGen.setAction(NoPrivAction); 11301 CodeGen(CGF); 11302 } 11303 11304 if (IfCond) { 11305 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11306 } else { 11307 RegionCodeGenTy RCG(EndThenGen); 11308 RCG(CGF); 11309 } 11310 } 11311 11312 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11313 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11314 const Expr *Device) { 11315 if (!CGF.HaveInsertPoint()) 11316 return; 11317 11318 assert((isa<OMPTargetEnterDataDirective>(D) || 11319 isa<OMPTargetExitDataDirective>(D) || 11320 isa<OMPTargetUpdateDirective>(D)) && 11321 "Expecting either target enter, exit data, or update directives."); 11322 11323 CodeGenFunction::OMPTargetDataInfo InputInfo; 11324 llvm::Value *MapTypesArray = nullptr; 11325 llvm::Value *MapNamesArray = nullptr; 11326 // Generate the code for the opening of the data environment. 11327 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11328 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11329 // Emit device ID if any. 11330 llvm::Value *DeviceID = nullptr; 11331 if (Device) { 11332 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11333 CGF.Int64Ty, /*isSigned=*/true); 11334 } else { 11335 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11336 } 11337 11338 // Emit the number of elements in the offloading arrays. 11339 llvm::Constant *PointerNum = 11340 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11341 11342 // Source location for the ident struct 11343 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11344 11345 llvm::Value *OffloadingArgs[] = {RTLoc, 11346 DeviceID, 11347 PointerNum, 11348 InputInfo.BasePointersArray.getPointer(), 11349 InputInfo.PointersArray.getPointer(), 11350 InputInfo.SizesArray.getPointer(), 11351 MapTypesArray, 11352 MapNamesArray, 11353 InputInfo.MappersArray.getPointer()}; 11354 11355 // Select the right runtime function call for each standalone 11356 // directive. 11357 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11358 RuntimeFunction RTLFn; 11359 switch (D.getDirectiveKind()) { 11360 case OMPD_target_enter_data: 11361 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11362 : OMPRTL___tgt_target_data_begin_mapper; 11363 break; 11364 case OMPD_target_exit_data: 11365 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11366 : OMPRTL___tgt_target_data_end_mapper; 11367 break; 11368 case OMPD_target_update: 11369 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11370 : OMPRTL___tgt_target_data_update_mapper; 11371 break; 11372 case OMPD_parallel: 11373 case OMPD_for: 11374 case OMPD_parallel_for: 11375 case OMPD_parallel_master: 11376 case OMPD_parallel_sections: 11377 case OMPD_for_simd: 11378 case OMPD_parallel_for_simd: 11379 case OMPD_cancel: 11380 case OMPD_cancellation_point: 11381 case OMPD_ordered: 11382 case OMPD_threadprivate: 11383 case OMPD_allocate: 11384 case OMPD_task: 11385 case OMPD_simd: 11386 case OMPD_tile: 11387 case OMPD_unroll: 11388 case OMPD_sections: 11389 case OMPD_section: 11390 case OMPD_single: 11391 case OMPD_master: 11392 case OMPD_critical: 11393 case OMPD_taskyield: 11394 case OMPD_barrier: 11395 case OMPD_taskwait: 11396 case OMPD_taskgroup: 11397 case OMPD_atomic: 11398 case OMPD_flush: 11399 case OMPD_depobj: 11400 case OMPD_scan: 11401 case OMPD_teams: 11402 case OMPD_target_data: 11403 case OMPD_distribute: 11404 case OMPD_distribute_simd: 11405 case OMPD_distribute_parallel_for: 11406 case OMPD_distribute_parallel_for_simd: 11407 case OMPD_teams_distribute: 11408 case OMPD_teams_distribute_simd: 11409 case OMPD_teams_distribute_parallel_for: 11410 case OMPD_teams_distribute_parallel_for_simd: 11411 case OMPD_declare_simd: 11412 case OMPD_declare_variant: 11413 case OMPD_begin_declare_variant: 11414 case OMPD_end_declare_variant: 11415 case OMPD_declare_target: 11416 case OMPD_end_declare_target: 11417 case OMPD_declare_reduction: 11418 case OMPD_declare_mapper: 11419 case OMPD_taskloop: 11420 case OMPD_taskloop_simd: 11421 case OMPD_master_taskloop: 11422 case OMPD_master_taskloop_simd: 11423 case OMPD_parallel_master_taskloop: 11424 case OMPD_parallel_master_taskloop_simd: 11425 case OMPD_target: 11426 case OMPD_target_simd: 11427 case OMPD_target_teams_distribute: 11428 case OMPD_target_teams_distribute_simd: 11429 case OMPD_target_teams_distribute_parallel_for: 11430 case OMPD_target_teams_distribute_parallel_for_simd: 11431 case OMPD_target_teams: 11432 case OMPD_target_parallel: 11433 case OMPD_target_parallel_for: 11434 case OMPD_target_parallel_for_simd: 11435 case OMPD_requires: 11436 case OMPD_metadirective: 11437 case OMPD_unknown: 11438 default: 11439 llvm_unreachable("Unexpected standalone target data directive."); 11440 break; 11441 } 11442 CGF.EmitRuntimeCall( 11443 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11444 OffloadingArgs); 11445 }; 11446 11447 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11448 &MapNamesArray](CodeGenFunction &CGF, 11449 PrePostActionTy &) { 11450 // Fill up the arrays with all the mapped variables. 11451 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11452 11453 // Get map clause information. 11454 MappableExprsHandler MEHandler(D, CGF); 11455 MEHandler.generateAllInfo(CombinedInfo); 11456 11457 TargetDataInfo Info; 11458 // Fill up the arrays and create the arguments. 11459 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11460 /*IsNonContiguous=*/true); 11461 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11462 D.hasClausesOfKind<OMPNowaitClause>(); 11463 emitOffloadingArraysArgument( 11464 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11465 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11466 {/*ForEndCall=*/false}); 11467 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11468 InputInfo.BasePointersArray = 11469 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11470 InputInfo.PointersArray = 11471 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11472 InputInfo.SizesArray = 11473 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 11474 InputInfo.MappersArray = 11475 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 11476 MapTypesArray = Info.MapTypesArray; 11477 MapNamesArray = Info.MapNamesArray; 11478 if (RequiresOuterTask) 11479 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11480 else 11481 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11482 }; 11483 11484 if (IfCond) { 11485 emitIfClause(CGF, IfCond, TargetThenGen, 11486 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11487 } else { 11488 RegionCodeGenTy ThenRCG(TargetThenGen); 11489 ThenRCG(CGF); 11490 } 11491 } 11492 11493 namespace { 11494 /// Kind of parameter in a function with 'declare simd' directive. 11495 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11496 /// Attribute set of the parameter. 11497 struct ParamAttrTy { 11498 ParamKindTy Kind = Vector; 11499 llvm::APSInt StrideOrArg; 11500 llvm::APSInt Alignment; 11501 }; 11502 } // namespace 11503 11504 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11505 ArrayRef<ParamAttrTy> ParamAttrs) { 11506 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11507 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11508 // of that clause. The VLEN value must be power of 2. 11509 // In other case the notion of the function`s "characteristic data type" (CDT) 11510 // is used to compute the vector length. 11511 // CDT is defined in the following order: 11512 // a) For non-void function, the CDT is the return type. 11513 // b) If the function has any non-uniform, non-linear parameters, then the 11514 // CDT is the type of the first such parameter. 11515 // c) If the CDT determined by a) or b) above is struct, union, or class 11516 // type which is pass-by-value (except for the type that maps to the 11517 // built-in complex data type), the characteristic data type is int. 11518 // d) If none of the above three cases is applicable, the CDT is int. 11519 // The VLEN is then determined based on the CDT and the size of vector 11520 // register of that ISA for which current vector version is generated. The 11521 // VLEN is computed using the formula below: 11522 // VLEN = sizeof(vector_register) / sizeof(CDT), 11523 // where vector register size specified in section 3.2.1 Registers and the 11524 // Stack Frame of original AMD64 ABI document. 11525 QualType RetType = FD->getReturnType(); 11526 if (RetType.isNull()) 11527 return 0; 11528 ASTContext &C = FD->getASTContext(); 11529 QualType CDT; 11530 if (!RetType.isNull() && !RetType->isVoidType()) { 11531 CDT = RetType; 11532 } else { 11533 unsigned Offset = 0; 11534 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11535 if (ParamAttrs[Offset].Kind == Vector) 11536 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11537 ++Offset; 11538 } 11539 if (CDT.isNull()) { 11540 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11541 if (ParamAttrs[I + Offset].Kind == Vector) { 11542 CDT = FD->getParamDecl(I)->getType(); 11543 break; 11544 } 11545 } 11546 } 11547 } 11548 if (CDT.isNull()) 11549 CDT = C.IntTy; 11550 CDT = CDT->getCanonicalTypeUnqualified(); 11551 if (CDT->isRecordType() || CDT->isUnionType()) 11552 CDT = C.IntTy; 11553 return C.getTypeSize(CDT); 11554 } 11555 11556 static void 11557 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11558 const llvm::APSInt &VLENVal, 11559 ArrayRef<ParamAttrTy> ParamAttrs, 11560 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11561 struct ISADataTy { 11562 char ISA; 11563 unsigned VecRegSize; 11564 }; 11565 ISADataTy ISAData[] = { 11566 { 11567 'b', 128 11568 }, // SSE 11569 { 11570 'c', 256 11571 }, // AVX 11572 { 11573 'd', 256 11574 }, // AVX2 11575 { 11576 'e', 512 11577 }, // AVX512 11578 }; 11579 llvm::SmallVector<char, 2> Masked; 11580 switch (State) { 11581 case OMPDeclareSimdDeclAttr::BS_Undefined: 11582 Masked.push_back('N'); 11583 Masked.push_back('M'); 11584 break; 11585 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11586 Masked.push_back('N'); 11587 break; 11588 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11589 Masked.push_back('M'); 11590 break; 11591 } 11592 for (char Mask : Masked) { 11593 for (const ISADataTy &Data : ISAData) { 11594 SmallString<256> Buffer; 11595 llvm::raw_svector_ostream Out(Buffer); 11596 Out << "_ZGV" << Data.ISA << Mask; 11597 if (!VLENVal) { 11598 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11599 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11600 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11601 } else { 11602 Out << VLENVal; 11603 } 11604 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11605 switch (ParamAttr.Kind){ 11606 case LinearWithVarStride: 11607 Out << 's' << ParamAttr.StrideOrArg; 11608 break; 11609 case Linear: 11610 Out << 'l'; 11611 if (ParamAttr.StrideOrArg != 1) 11612 Out << ParamAttr.StrideOrArg; 11613 break; 11614 case Uniform: 11615 Out << 'u'; 11616 break; 11617 case Vector: 11618 Out << 'v'; 11619 break; 11620 } 11621 if (!!ParamAttr.Alignment) 11622 Out << 'a' << ParamAttr.Alignment; 11623 } 11624 Out << '_' << Fn->getName(); 11625 Fn->addFnAttr(Out.str()); 11626 } 11627 } 11628 } 11629 11630 // This are the Functions that are needed to mangle the name of the 11631 // vector functions generated by the compiler, according to the rules 11632 // defined in the "Vector Function ABI specifications for AArch64", 11633 // available at 11634 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11635 11636 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11637 /// 11638 /// TODO: Need to implement the behavior for reference marked with a 11639 /// var or no linear modifiers (1.b in the section). For this, we 11640 /// need to extend ParamKindTy to support the linear modifiers. 11641 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11642 QT = QT.getCanonicalType(); 11643 11644 if (QT->isVoidType()) 11645 return false; 11646 11647 if (Kind == ParamKindTy::Uniform) 11648 return false; 11649 11650 if (Kind == ParamKindTy::Linear) 11651 return false; 11652 11653 // TODO: Handle linear references with modifiers 11654 11655 if (Kind == ParamKindTy::LinearWithVarStride) 11656 return false; 11657 11658 return true; 11659 } 11660 11661 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11662 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11663 QT = QT.getCanonicalType(); 11664 unsigned Size = C.getTypeSize(QT); 11665 11666 // Only scalars and complex within 16 bytes wide set PVB to true. 11667 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11668 return false; 11669 11670 if (QT->isFloatingType()) 11671 return true; 11672 11673 if (QT->isIntegerType()) 11674 return true; 11675 11676 if (QT->isPointerType()) 11677 return true; 11678 11679 // TODO: Add support for complex types (section 3.1.2, item 2). 11680 11681 return false; 11682 } 11683 11684 /// Computes the lane size (LS) of a return type or of an input parameter, 11685 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11686 /// TODO: Add support for references, section 3.2.1, item 1. 11687 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11688 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11689 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11690 if (getAArch64PBV(PTy, C)) 11691 return C.getTypeSize(PTy); 11692 } 11693 if (getAArch64PBV(QT, C)) 11694 return C.getTypeSize(QT); 11695 11696 return C.getTypeSize(C.getUIntPtrType()); 11697 } 11698 11699 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11700 // signature of the scalar function, as defined in 3.2.2 of the 11701 // AAVFABI. 11702 static std::tuple<unsigned, unsigned, bool> 11703 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11704 QualType RetType = FD->getReturnType().getCanonicalType(); 11705 11706 ASTContext &C = FD->getASTContext(); 11707 11708 bool OutputBecomesInput = false; 11709 11710 llvm::SmallVector<unsigned, 8> Sizes; 11711 if (!RetType->isVoidType()) { 11712 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11713 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11714 OutputBecomesInput = true; 11715 } 11716 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11717 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11718 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11719 } 11720 11721 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11722 // The LS of a function parameter / return value can only be a power 11723 // of 2, starting from 8 bits, up to 128. 11724 assert(llvm::all_of(Sizes, 11725 [](unsigned Size) { 11726 return Size == 8 || Size == 16 || Size == 32 || 11727 Size == 64 || Size == 128; 11728 }) && 11729 "Invalid size"); 11730 11731 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11732 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11733 OutputBecomesInput); 11734 } 11735 11736 /// Mangle the parameter part of the vector function name according to 11737 /// their OpenMP classification. The mangling function is defined in 11738 /// section 3.5 of the AAVFABI. 11739 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11740 SmallString<256> Buffer; 11741 llvm::raw_svector_ostream Out(Buffer); 11742 for (const auto &ParamAttr : ParamAttrs) { 11743 switch (ParamAttr.Kind) { 11744 case LinearWithVarStride: 11745 Out << "ls" << ParamAttr.StrideOrArg; 11746 break; 11747 case Linear: 11748 Out << 'l'; 11749 // Don't print the step value if it is not present or if it is 11750 // equal to 1. 11751 if (ParamAttr.StrideOrArg != 1) 11752 Out << ParamAttr.StrideOrArg; 11753 break; 11754 case Uniform: 11755 Out << 'u'; 11756 break; 11757 case Vector: 11758 Out << 'v'; 11759 break; 11760 } 11761 11762 if (!!ParamAttr.Alignment) 11763 Out << 'a' << ParamAttr.Alignment; 11764 } 11765 11766 return std::string(Out.str()); 11767 } 11768 11769 // Function used to add the attribute. The parameter `VLEN` is 11770 // templated to allow the use of "x" when targeting scalable functions 11771 // for SVE. 11772 template <typename T> 11773 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11774 char ISA, StringRef ParSeq, 11775 StringRef MangledName, bool OutputBecomesInput, 11776 llvm::Function *Fn) { 11777 SmallString<256> Buffer; 11778 llvm::raw_svector_ostream Out(Buffer); 11779 Out << Prefix << ISA << LMask << VLEN; 11780 if (OutputBecomesInput) 11781 Out << "v"; 11782 Out << ParSeq << "_" << MangledName; 11783 Fn->addFnAttr(Out.str()); 11784 } 11785 11786 // Helper function to generate the Advanced SIMD names depending on 11787 // the value of the NDS when simdlen is not present. 11788 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11789 StringRef Prefix, char ISA, 11790 StringRef ParSeq, StringRef MangledName, 11791 bool OutputBecomesInput, 11792 llvm::Function *Fn) { 11793 switch (NDS) { 11794 case 8: 11795 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11796 OutputBecomesInput, Fn); 11797 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11798 OutputBecomesInput, Fn); 11799 break; 11800 case 16: 11801 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11802 OutputBecomesInput, Fn); 11803 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11804 OutputBecomesInput, Fn); 11805 break; 11806 case 32: 11807 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11808 OutputBecomesInput, Fn); 11809 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11810 OutputBecomesInput, Fn); 11811 break; 11812 case 64: 11813 case 128: 11814 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11815 OutputBecomesInput, Fn); 11816 break; 11817 default: 11818 llvm_unreachable("Scalar type is too wide."); 11819 } 11820 } 11821 11822 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11823 static void emitAArch64DeclareSimdFunction( 11824 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11825 ArrayRef<ParamAttrTy> ParamAttrs, 11826 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11827 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11828 11829 // Get basic data for building the vector signature. 11830 const auto Data = getNDSWDS(FD, ParamAttrs); 11831 const unsigned NDS = std::get<0>(Data); 11832 const unsigned WDS = std::get<1>(Data); 11833 const bool OutputBecomesInput = std::get<2>(Data); 11834 11835 // Check the values provided via `simdlen` by the user. 11836 // 1. A `simdlen(1)` doesn't produce vector signatures, 11837 if (UserVLEN == 1) { 11838 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11839 DiagnosticsEngine::Warning, 11840 "The clause simdlen(1) has no effect when targeting aarch64."); 11841 CGM.getDiags().Report(SLoc, DiagID); 11842 return; 11843 } 11844 11845 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11846 // Advanced SIMD output. 11847 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11848 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11849 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11850 "power of 2 when targeting Advanced SIMD."); 11851 CGM.getDiags().Report(SLoc, DiagID); 11852 return; 11853 } 11854 11855 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11856 // limits. 11857 if (ISA == 's' && UserVLEN != 0) { 11858 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11859 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11860 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11861 "lanes in the architectural constraints " 11862 "for SVE (min is 128-bit, max is " 11863 "2048-bit, by steps of 128-bit)"); 11864 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11865 return; 11866 } 11867 } 11868 11869 // Sort out parameter sequence. 11870 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11871 StringRef Prefix = "_ZGV"; 11872 // Generate simdlen from user input (if any). 11873 if (UserVLEN) { 11874 if (ISA == 's') { 11875 // SVE generates only a masked function. 11876 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11877 OutputBecomesInput, Fn); 11878 } else { 11879 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11880 // Advanced SIMD generates one or two functions, depending on 11881 // the `[not]inbranch` clause. 11882 switch (State) { 11883 case OMPDeclareSimdDeclAttr::BS_Undefined: 11884 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11885 OutputBecomesInput, Fn); 11886 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11887 OutputBecomesInput, Fn); 11888 break; 11889 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11890 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11891 OutputBecomesInput, Fn); 11892 break; 11893 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11894 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11895 OutputBecomesInput, Fn); 11896 break; 11897 } 11898 } 11899 } else { 11900 // If no user simdlen is provided, follow the AAVFABI rules for 11901 // generating the vector length. 11902 if (ISA == 's') { 11903 // SVE, section 3.4.1, item 1. 11904 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11905 OutputBecomesInput, Fn); 11906 } else { 11907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11908 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11909 // two vector names depending on the use of the clause 11910 // `[not]inbranch`. 11911 switch (State) { 11912 case OMPDeclareSimdDeclAttr::BS_Undefined: 11913 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11914 OutputBecomesInput, Fn); 11915 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11916 OutputBecomesInput, Fn); 11917 break; 11918 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11919 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11920 OutputBecomesInput, Fn); 11921 break; 11922 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11923 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11924 OutputBecomesInput, Fn); 11925 break; 11926 } 11927 } 11928 } 11929 } 11930 11931 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11932 llvm::Function *Fn) { 11933 ASTContext &C = CGM.getContext(); 11934 FD = FD->getMostRecentDecl(); 11935 // Map params to their positions in function decl. 11936 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11937 if (isa<CXXMethodDecl>(FD)) 11938 ParamPositions.try_emplace(FD, 0); 11939 unsigned ParamPos = ParamPositions.size(); 11940 for (const ParmVarDecl *P : FD->parameters()) { 11941 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11942 ++ParamPos; 11943 } 11944 while (FD) { 11945 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11946 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11947 // Mark uniform parameters. 11948 for (const Expr *E : Attr->uniforms()) { 11949 E = E->IgnoreParenImpCasts(); 11950 unsigned Pos; 11951 if (isa<CXXThisExpr>(E)) { 11952 Pos = ParamPositions[FD]; 11953 } else { 11954 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11955 ->getCanonicalDecl(); 11956 Pos = ParamPositions[PVD]; 11957 } 11958 ParamAttrs[Pos].Kind = Uniform; 11959 } 11960 // Get alignment info. 11961 auto *NI = Attr->alignments_begin(); 11962 for (const Expr *E : Attr->aligneds()) { 11963 E = E->IgnoreParenImpCasts(); 11964 unsigned Pos; 11965 QualType ParmTy; 11966 if (isa<CXXThisExpr>(E)) { 11967 Pos = ParamPositions[FD]; 11968 ParmTy = E->getType(); 11969 } else { 11970 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11971 ->getCanonicalDecl(); 11972 Pos = ParamPositions[PVD]; 11973 ParmTy = PVD->getType(); 11974 } 11975 ParamAttrs[Pos].Alignment = 11976 (*NI) 11977 ? (*NI)->EvaluateKnownConstInt(C) 11978 : llvm::APSInt::getUnsigned( 11979 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11980 .getQuantity()); 11981 ++NI; 11982 } 11983 // Mark linear parameters. 11984 auto *SI = Attr->steps_begin(); 11985 auto *MI = Attr->modifiers_begin(); 11986 for (const Expr *E : Attr->linears()) { 11987 E = E->IgnoreParenImpCasts(); 11988 unsigned Pos; 11989 // Rescaling factor needed to compute the linear parameter 11990 // value in the mangled name. 11991 unsigned PtrRescalingFactor = 1; 11992 if (isa<CXXThisExpr>(E)) { 11993 Pos = ParamPositions[FD]; 11994 } else { 11995 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11996 ->getCanonicalDecl(); 11997 Pos = ParamPositions[PVD]; 11998 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11999 PtrRescalingFactor = CGM.getContext() 12000 .getTypeSizeInChars(P->getPointeeType()) 12001 .getQuantity(); 12002 } 12003 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12004 ParamAttr.Kind = Linear; 12005 // Assuming a stride of 1, for `linear` without modifiers. 12006 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12007 if (*SI) { 12008 Expr::EvalResult Result; 12009 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12010 if (const auto *DRE = 12011 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12012 if (const auto *StridePVD = 12013 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 12014 ParamAttr.Kind = LinearWithVarStride; 12015 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12016 ParamPositions[StridePVD->getCanonicalDecl()]); 12017 } 12018 } 12019 } else { 12020 ParamAttr.StrideOrArg = Result.Val.getInt(); 12021 } 12022 } 12023 // If we are using a linear clause on a pointer, we need to 12024 // rescale the value of linear_step with the byte size of the 12025 // pointee type. 12026 if (Linear == ParamAttr.Kind) 12027 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12028 ++SI; 12029 ++MI; 12030 } 12031 llvm::APSInt VLENVal; 12032 SourceLocation ExprLoc; 12033 const Expr *VLENExpr = Attr->getSimdlen(); 12034 if (VLENExpr) { 12035 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12036 ExprLoc = VLENExpr->getExprLoc(); 12037 } 12038 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12039 if (CGM.getTriple().isX86()) { 12040 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12041 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12042 unsigned VLEN = VLENVal.getExtValue(); 12043 StringRef MangledName = Fn->getName(); 12044 if (CGM.getTarget().hasFeature("sve")) 12045 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12046 MangledName, 's', 128, Fn, ExprLoc); 12047 if (CGM.getTarget().hasFeature("neon")) 12048 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12049 MangledName, 'n', 128, Fn, ExprLoc); 12050 } 12051 } 12052 FD = FD->getPreviousDecl(); 12053 } 12054 } 12055 12056 namespace { 12057 /// Cleanup action for doacross support. 12058 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12059 public: 12060 static const int DoacrossFinArgs = 2; 12061 12062 private: 12063 llvm::FunctionCallee RTLFn; 12064 llvm::Value *Args[DoacrossFinArgs]; 12065 12066 public: 12067 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12068 ArrayRef<llvm::Value *> CallArgs) 12069 : RTLFn(RTLFn) { 12070 assert(CallArgs.size() == DoacrossFinArgs); 12071 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12072 } 12073 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12074 if (!CGF.HaveInsertPoint()) 12075 return; 12076 CGF.EmitRuntimeCall(RTLFn, Args); 12077 } 12078 }; 12079 } // namespace 12080 12081 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12082 const OMPLoopDirective &D, 12083 ArrayRef<Expr *> NumIterations) { 12084 if (!CGF.HaveInsertPoint()) 12085 return; 12086 12087 ASTContext &C = CGM.getContext(); 12088 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12089 RecordDecl *RD; 12090 if (KmpDimTy.isNull()) { 12091 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12092 // kmp_int64 lo; // lower 12093 // kmp_int64 up; // upper 12094 // kmp_int64 st; // stride 12095 // }; 12096 RD = C.buildImplicitRecord("kmp_dim"); 12097 RD->startDefinition(); 12098 addFieldToRecordDecl(C, RD, Int64Ty); 12099 addFieldToRecordDecl(C, RD, Int64Ty); 12100 addFieldToRecordDecl(C, RD, Int64Ty); 12101 RD->completeDefinition(); 12102 KmpDimTy = C.getRecordType(RD); 12103 } else { 12104 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12105 } 12106 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12107 QualType ArrayTy = 12108 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12109 12110 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12111 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12112 enum { LowerFD = 0, UpperFD, StrideFD }; 12113 // Fill dims with data. 12114 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12115 LValue DimsLVal = CGF.MakeAddrLValue( 12116 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12117 // dims.upper = num_iterations; 12118 LValue UpperLVal = CGF.EmitLValueForField( 12119 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12120 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12121 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12122 Int64Ty, NumIterations[I]->getExprLoc()); 12123 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12124 // dims.stride = 1; 12125 LValue StrideLVal = CGF.EmitLValueForField( 12126 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12127 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12128 StrideLVal); 12129 } 12130 12131 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12132 // kmp_int32 num_dims, struct kmp_dim * dims); 12133 llvm::Value *Args[] = { 12134 emitUpdateLocation(CGF, D.getBeginLoc()), 12135 getThreadID(CGF, D.getBeginLoc()), 12136 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12137 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12138 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12139 CGM.VoidPtrTy)}; 12140 12141 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12142 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12143 CGF.EmitRuntimeCall(RTLFn, Args); 12144 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12145 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12146 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12147 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12148 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12149 llvm::makeArrayRef(FiniArgs)); 12150 } 12151 12152 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12153 const OMPDependClause *C) { 12154 QualType Int64Ty = 12155 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12156 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12157 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12158 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12159 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12160 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12161 const Expr *CounterVal = C->getLoopData(I); 12162 assert(CounterVal); 12163 llvm::Value *CntVal = CGF.EmitScalarConversion( 12164 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12165 CounterVal->getExprLoc()); 12166 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12167 /*Volatile=*/false, Int64Ty); 12168 } 12169 llvm::Value *Args[] = { 12170 emitUpdateLocation(CGF, C->getBeginLoc()), 12171 getThreadID(CGF, C->getBeginLoc()), 12172 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12173 llvm::FunctionCallee RTLFn; 12174 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12175 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12176 OMPRTL___kmpc_doacross_post); 12177 } else { 12178 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12179 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12180 OMPRTL___kmpc_doacross_wait); 12181 } 12182 CGF.EmitRuntimeCall(RTLFn, Args); 12183 } 12184 12185 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12186 llvm::FunctionCallee Callee, 12187 ArrayRef<llvm::Value *> Args) const { 12188 assert(Loc.isValid() && "Outlined function call location must be valid."); 12189 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12190 12191 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12192 if (Fn->doesNotThrow()) { 12193 CGF.EmitNounwindRuntimeCall(Fn, Args); 12194 return; 12195 } 12196 } 12197 CGF.EmitRuntimeCall(Callee, Args); 12198 } 12199 12200 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12201 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12202 ArrayRef<llvm::Value *> Args) const { 12203 emitCall(CGF, Loc, OutlinedFn, Args); 12204 } 12205 12206 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12207 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12208 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12209 HasEmittedDeclareTargetRegion = true; 12210 } 12211 12212 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12213 const VarDecl *NativeParam, 12214 const VarDecl *TargetParam) const { 12215 return CGF.GetAddrOfLocalVar(NativeParam); 12216 } 12217 12218 /// Return allocator value from expression, or return a null allocator (default 12219 /// when no allocator specified). 12220 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12221 const Expr *Allocator) { 12222 llvm::Value *AllocVal; 12223 if (Allocator) { 12224 AllocVal = CGF.EmitScalarExpr(Allocator); 12225 // According to the standard, the original allocator type is a enum 12226 // (integer). Convert to pointer type, if required. 12227 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12228 CGF.getContext().VoidPtrTy, 12229 Allocator->getExprLoc()); 12230 } else { 12231 // If no allocator specified, it defaults to the null allocator. 12232 AllocVal = llvm::Constant::getNullValue( 12233 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12234 } 12235 return AllocVal; 12236 } 12237 12238 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12239 const VarDecl *VD) { 12240 if (!VD) 12241 return Address::invalid(); 12242 Address UntiedAddr = Address::invalid(); 12243 Address UntiedRealAddr = Address::invalid(); 12244 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12245 if (It != FunctionToUntiedTaskStackMap.end()) { 12246 const UntiedLocalVarsAddressesMap &UntiedData = 12247 UntiedLocalVarsStack[It->second]; 12248 auto I = UntiedData.find(VD); 12249 if (I != UntiedData.end()) { 12250 UntiedAddr = I->second.first; 12251 UntiedRealAddr = I->second.second; 12252 } 12253 } 12254 const VarDecl *CVD = VD->getCanonicalDecl(); 12255 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12256 // Use the default allocation. 12257 if (!isAllocatableDecl(VD)) 12258 return UntiedAddr; 12259 llvm::Value *Size; 12260 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12261 if (CVD->getType()->isVariablyModifiedType()) { 12262 Size = CGF.getTypeSize(CVD->getType()); 12263 // Align the size: ((size + align - 1) / align) * align 12264 Size = CGF.Builder.CreateNUWAdd( 12265 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12266 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12267 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12268 } else { 12269 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12270 Size = CGM.getSize(Sz.alignTo(Align)); 12271 } 12272 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12273 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12274 const Expr *Allocator = AA->getAllocator(); 12275 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12276 llvm::Value *Alignment = 12277 AA->getAlignment() 12278 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12279 CGM.SizeTy, /*isSigned=*/false) 12280 : nullptr; 12281 SmallVector<llvm::Value *, 4> Args; 12282 Args.push_back(ThreadID); 12283 if (Alignment) 12284 Args.push_back(Alignment); 12285 Args.push_back(Size); 12286 Args.push_back(AllocVal); 12287 llvm::omp::RuntimeFunction FnID = 12288 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12289 llvm::Value *Addr = CGF.EmitRuntimeCall( 12290 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12291 getName({CVD->getName(), ".void.addr"})); 12292 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12293 CGM.getModule(), OMPRTL___kmpc_free); 12294 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12295 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12296 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12297 if (UntiedAddr.isValid()) 12298 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12299 12300 // Cleanup action for allocate support. 12301 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12302 llvm::FunctionCallee RTLFn; 12303 SourceLocation::UIntTy LocEncoding; 12304 Address Addr; 12305 const Expr *AllocExpr; 12306 12307 public: 12308 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12309 SourceLocation::UIntTy LocEncoding, Address Addr, 12310 const Expr *AllocExpr) 12311 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12312 AllocExpr(AllocExpr) {} 12313 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12314 if (!CGF.HaveInsertPoint()) 12315 return; 12316 llvm::Value *Args[3]; 12317 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12318 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12319 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12320 Addr.getPointer(), CGF.VoidPtrTy); 12321 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12322 Args[2] = AllocVal; 12323 CGF.EmitRuntimeCall(RTLFn, Args); 12324 } 12325 }; 12326 Address VDAddr = 12327 UntiedRealAddr.isValid() 12328 ? UntiedRealAddr 12329 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 12330 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12331 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12332 VDAddr, Allocator); 12333 if (UntiedRealAddr.isValid()) 12334 if (auto *Region = 12335 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12336 Region->emitUntiedSwitch(CGF); 12337 return VDAddr; 12338 } 12339 return UntiedAddr; 12340 } 12341 12342 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12343 const VarDecl *VD) const { 12344 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12345 if (It == FunctionToUntiedTaskStackMap.end()) 12346 return false; 12347 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12348 } 12349 12350 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12351 CodeGenModule &CGM, const OMPLoopDirective &S) 12352 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12353 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12354 if (!NeedToPush) 12355 return; 12356 NontemporalDeclsSet &DS = 12357 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12358 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12359 for (const Stmt *Ref : C->private_refs()) { 12360 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12361 const ValueDecl *VD; 12362 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12363 VD = DRE->getDecl(); 12364 } else { 12365 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12366 assert((ME->isImplicitCXXThis() || 12367 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12368 "Expected member of current class."); 12369 VD = ME->getMemberDecl(); 12370 } 12371 DS.insert(VD); 12372 } 12373 } 12374 } 12375 12376 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12377 if (!NeedToPush) 12378 return; 12379 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12380 } 12381 12382 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12383 CodeGenFunction &CGF, 12384 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12385 std::pair<Address, Address>> &LocalVars) 12386 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12387 if (!NeedToPush) 12388 return; 12389 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12390 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12391 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12392 } 12393 12394 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12395 if (!NeedToPush) 12396 return; 12397 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12398 } 12399 12400 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12401 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12402 12403 return llvm::any_of( 12404 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12405 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12406 } 12407 12408 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12409 const OMPExecutableDirective &S, 12410 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12411 const { 12412 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12413 // Vars in target/task regions must be excluded completely. 12414 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12415 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12416 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12417 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12418 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12419 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12420 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12421 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12422 } 12423 } 12424 // Exclude vars in private clauses. 12425 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12426 for (const Expr *Ref : C->varlists()) { 12427 if (!Ref->getType()->isScalarType()) 12428 continue; 12429 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12430 if (!DRE) 12431 continue; 12432 NeedToCheckForLPCs.insert(DRE->getDecl()); 12433 } 12434 } 12435 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12436 for (const Expr *Ref : C->varlists()) { 12437 if (!Ref->getType()->isScalarType()) 12438 continue; 12439 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12440 if (!DRE) 12441 continue; 12442 NeedToCheckForLPCs.insert(DRE->getDecl()); 12443 } 12444 } 12445 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12446 for (const Expr *Ref : C->varlists()) { 12447 if (!Ref->getType()->isScalarType()) 12448 continue; 12449 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12450 if (!DRE) 12451 continue; 12452 NeedToCheckForLPCs.insert(DRE->getDecl()); 12453 } 12454 } 12455 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12456 for (const Expr *Ref : C->varlists()) { 12457 if (!Ref->getType()->isScalarType()) 12458 continue; 12459 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12460 if (!DRE) 12461 continue; 12462 NeedToCheckForLPCs.insert(DRE->getDecl()); 12463 } 12464 } 12465 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12466 for (const Expr *Ref : C->varlists()) { 12467 if (!Ref->getType()->isScalarType()) 12468 continue; 12469 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12470 if (!DRE) 12471 continue; 12472 NeedToCheckForLPCs.insert(DRE->getDecl()); 12473 } 12474 } 12475 for (const Decl *VD : NeedToCheckForLPCs) { 12476 for (const LastprivateConditionalData &Data : 12477 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12478 if (Data.DeclToUniqueName.count(VD) > 0) { 12479 if (!Data.Disabled) 12480 NeedToAddForLPCsAsDisabled.insert(VD); 12481 break; 12482 } 12483 } 12484 } 12485 } 12486 12487 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12488 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12489 : CGM(CGF.CGM), 12490 Action((CGM.getLangOpts().OpenMP >= 50 && 12491 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12492 [](const OMPLastprivateClause *C) { 12493 return C->getKind() == 12494 OMPC_LASTPRIVATE_conditional; 12495 })) 12496 ? ActionToDo::PushAsLastprivateConditional 12497 : ActionToDo::DoNotPush) { 12498 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12499 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12500 return; 12501 assert(Action == ActionToDo::PushAsLastprivateConditional && 12502 "Expected a push action."); 12503 LastprivateConditionalData &Data = 12504 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12505 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12506 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12507 continue; 12508 12509 for (const Expr *Ref : C->varlists()) { 12510 Data.DeclToUniqueName.insert(std::make_pair( 12511 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12512 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12513 } 12514 } 12515 Data.IVLVal = IVLVal; 12516 Data.Fn = CGF.CurFn; 12517 } 12518 12519 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12520 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12521 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12522 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12523 if (CGM.getLangOpts().OpenMP < 50) 12524 return; 12525 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12526 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12527 if (!NeedToAddForLPCsAsDisabled.empty()) { 12528 Action = ActionToDo::DisableLastprivateConditional; 12529 LastprivateConditionalData &Data = 12530 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12531 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12532 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12533 Data.Fn = CGF.CurFn; 12534 Data.Disabled = true; 12535 } 12536 } 12537 12538 CGOpenMPRuntime::LastprivateConditionalRAII 12539 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12540 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12541 return LastprivateConditionalRAII(CGF, S); 12542 } 12543 12544 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12545 if (CGM.getLangOpts().OpenMP < 50) 12546 return; 12547 if (Action == ActionToDo::DisableLastprivateConditional) { 12548 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12549 "Expected list of disabled private vars."); 12550 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12551 } 12552 if (Action == ActionToDo::PushAsLastprivateConditional) { 12553 assert( 12554 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12555 "Expected list of lastprivate conditional vars."); 12556 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12557 } 12558 } 12559 12560 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12561 const VarDecl *VD) { 12562 ASTContext &C = CGM.getContext(); 12563 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12564 if (I == LastprivateConditionalToTypes.end()) 12565 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12566 QualType NewType; 12567 const FieldDecl *VDField; 12568 const FieldDecl *FiredField; 12569 LValue BaseLVal; 12570 auto VI = I->getSecond().find(VD); 12571 if (VI == I->getSecond().end()) { 12572 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12573 RD->startDefinition(); 12574 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12575 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12576 RD->completeDefinition(); 12577 NewType = C.getRecordType(RD); 12578 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12579 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12580 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12581 } else { 12582 NewType = std::get<0>(VI->getSecond()); 12583 VDField = std::get<1>(VI->getSecond()); 12584 FiredField = std::get<2>(VI->getSecond()); 12585 BaseLVal = std::get<3>(VI->getSecond()); 12586 } 12587 LValue FiredLVal = 12588 CGF.EmitLValueForField(BaseLVal, FiredField); 12589 CGF.EmitStoreOfScalar( 12590 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12591 FiredLVal); 12592 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12593 } 12594 12595 namespace { 12596 /// Checks if the lastprivate conditional variable is referenced in LHS. 12597 class LastprivateConditionalRefChecker final 12598 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12599 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12600 const Expr *FoundE = nullptr; 12601 const Decl *FoundD = nullptr; 12602 StringRef UniqueDeclName; 12603 LValue IVLVal; 12604 llvm::Function *FoundFn = nullptr; 12605 SourceLocation Loc; 12606 12607 public: 12608 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12609 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12610 llvm::reverse(LPM)) { 12611 auto It = D.DeclToUniqueName.find(E->getDecl()); 12612 if (It == D.DeclToUniqueName.end()) 12613 continue; 12614 if (D.Disabled) 12615 return false; 12616 FoundE = E; 12617 FoundD = E->getDecl()->getCanonicalDecl(); 12618 UniqueDeclName = It->second; 12619 IVLVal = D.IVLVal; 12620 FoundFn = D.Fn; 12621 break; 12622 } 12623 return FoundE == E; 12624 } 12625 bool VisitMemberExpr(const MemberExpr *E) { 12626 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12627 return false; 12628 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12629 llvm::reverse(LPM)) { 12630 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12631 if (It == D.DeclToUniqueName.end()) 12632 continue; 12633 if (D.Disabled) 12634 return false; 12635 FoundE = E; 12636 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12637 UniqueDeclName = It->second; 12638 IVLVal = D.IVLVal; 12639 FoundFn = D.Fn; 12640 break; 12641 } 12642 return FoundE == E; 12643 } 12644 bool VisitStmt(const Stmt *S) { 12645 for (const Stmt *Child : S->children()) { 12646 if (!Child) 12647 continue; 12648 if (const auto *E = dyn_cast<Expr>(Child)) 12649 if (!E->isGLValue()) 12650 continue; 12651 if (Visit(Child)) 12652 return true; 12653 } 12654 return false; 12655 } 12656 explicit LastprivateConditionalRefChecker( 12657 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12658 : LPM(LPM) {} 12659 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12660 getFoundData() const { 12661 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12662 } 12663 }; 12664 } // namespace 12665 12666 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12667 LValue IVLVal, 12668 StringRef UniqueDeclName, 12669 LValue LVal, 12670 SourceLocation Loc) { 12671 // Last updated loop counter for the lastprivate conditional var. 12672 // int<xx> last_iv = 0; 12673 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12674 llvm::Constant *LastIV = 12675 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12676 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12677 IVLVal.getAlignment().getAsAlign()); 12678 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12679 12680 // Last value of the lastprivate conditional. 12681 // decltype(priv_a) last_a; 12682 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12683 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12684 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12685 LValue LastLVal = CGF.MakeAddrLValue( 12686 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12687 12688 // Global loop counter. Required to handle inner parallel-for regions. 12689 // iv 12690 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12691 12692 // #pragma omp critical(a) 12693 // if (last_iv <= iv) { 12694 // last_iv = iv; 12695 // last_a = priv_a; 12696 // } 12697 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12698 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12699 Action.Enter(CGF); 12700 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12701 // (last_iv <= iv) ? Check if the variable is updated and store new 12702 // value in global var. 12703 llvm::Value *CmpRes; 12704 if (IVLVal.getType()->isSignedIntegerType()) { 12705 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12706 } else { 12707 assert(IVLVal.getType()->isUnsignedIntegerType() && 12708 "Loop iteration variable must be integer."); 12709 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12710 } 12711 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12712 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12713 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12714 // { 12715 CGF.EmitBlock(ThenBB); 12716 12717 // last_iv = iv; 12718 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12719 12720 // last_a = priv_a; 12721 switch (CGF.getEvaluationKind(LVal.getType())) { 12722 case TEK_Scalar: { 12723 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12724 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12725 break; 12726 } 12727 case TEK_Complex: { 12728 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12729 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12730 break; 12731 } 12732 case TEK_Aggregate: 12733 llvm_unreachable( 12734 "Aggregates are not supported in lastprivate conditional."); 12735 } 12736 // } 12737 CGF.EmitBranch(ExitBB); 12738 // There is no need to emit line number for unconditional branch. 12739 (void)ApplyDebugLocation::CreateEmpty(CGF); 12740 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12741 }; 12742 12743 if (CGM.getLangOpts().OpenMPSimd) { 12744 // Do not emit as a critical region as no parallel region could be emitted. 12745 RegionCodeGenTy ThenRCG(CodeGen); 12746 ThenRCG(CGF); 12747 } else { 12748 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12749 } 12750 } 12751 12752 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12753 const Expr *LHS) { 12754 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12755 return; 12756 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12757 if (!Checker.Visit(LHS)) 12758 return; 12759 const Expr *FoundE; 12760 const Decl *FoundD; 12761 StringRef UniqueDeclName; 12762 LValue IVLVal; 12763 llvm::Function *FoundFn; 12764 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12765 Checker.getFoundData(); 12766 if (FoundFn != CGF.CurFn) { 12767 // Special codegen for inner parallel regions. 12768 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12769 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12770 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12771 "Lastprivate conditional is not found in outer region."); 12772 QualType StructTy = std::get<0>(It->getSecond()); 12773 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12774 LValue PrivLVal = CGF.EmitLValue(FoundE); 12775 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12776 PrivLVal.getAddress(CGF), 12777 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 12778 CGF.ConvertTypeForMem(StructTy)); 12779 LValue BaseLVal = 12780 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12781 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12782 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12783 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12784 FiredLVal, llvm::AtomicOrdering::Unordered, 12785 /*IsVolatile=*/true, /*isInit=*/false); 12786 return; 12787 } 12788 12789 // Private address of the lastprivate conditional in the current context. 12790 // priv_a 12791 LValue LVal = CGF.EmitLValue(FoundE); 12792 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12793 FoundE->getExprLoc()); 12794 } 12795 12796 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12797 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12798 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12799 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12800 return; 12801 auto Range = llvm::reverse(LastprivateConditionalStack); 12802 auto It = llvm::find_if( 12803 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12804 if (It == Range.end() || It->Fn != CGF.CurFn) 12805 return; 12806 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12807 assert(LPCI != LastprivateConditionalToTypes.end() && 12808 "Lastprivates must be registered already."); 12809 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12810 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12811 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12812 for (const auto &Pair : It->DeclToUniqueName) { 12813 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12814 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12815 continue; 12816 auto I = LPCI->getSecond().find(Pair.first); 12817 assert(I != LPCI->getSecond().end() && 12818 "Lastprivate must be rehistered already."); 12819 // bool Cmp = priv_a.Fired != 0; 12820 LValue BaseLVal = std::get<3>(I->getSecond()); 12821 LValue FiredLVal = 12822 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12823 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12824 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12825 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12826 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12827 // if (Cmp) { 12828 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12829 CGF.EmitBlock(ThenBB); 12830 Address Addr = CGF.GetAddrOfLocalVar(VD); 12831 LValue LVal; 12832 if (VD->getType()->isReferenceType()) 12833 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12834 AlignmentSource::Decl); 12835 else 12836 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12837 AlignmentSource::Decl); 12838 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12839 D.getBeginLoc()); 12840 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12841 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12842 // } 12843 } 12844 } 12845 12846 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12847 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12848 SourceLocation Loc) { 12849 if (CGF.getLangOpts().OpenMP < 50) 12850 return; 12851 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12852 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12853 "Unknown lastprivate conditional variable."); 12854 StringRef UniqueName = It->second; 12855 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12856 // The variable was not updated in the region - exit. 12857 if (!GV) 12858 return; 12859 LValue LPLVal = CGF.MakeAddrLValue( 12860 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12861 PrivLVal.getType().getNonReferenceType()); 12862 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12863 CGF.EmitStoreOfScalar(Res, PrivLVal); 12864 } 12865 12866 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12867 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12868 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12869 llvm_unreachable("Not supported in SIMD-only mode"); 12870 } 12871 12872 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12873 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12874 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12875 llvm_unreachable("Not supported in SIMD-only mode"); 12876 } 12877 12878 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12879 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12880 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12881 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12882 bool Tied, unsigned &NumberOfParts) { 12883 llvm_unreachable("Not supported in SIMD-only mode"); 12884 } 12885 12886 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12887 SourceLocation Loc, 12888 llvm::Function *OutlinedFn, 12889 ArrayRef<llvm::Value *> CapturedVars, 12890 const Expr *IfCond, 12891 llvm::Value *NumThreads) { 12892 llvm_unreachable("Not supported in SIMD-only mode"); 12893 } 12894 12895 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12896 CodeGenFunction &CGF, StringRef CriticalName, 12897 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12898 const Expr *Hint) { 12899 llvm_unreachable("Not supported in SIMD-only mode"); 12900 } 12901 12902 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12903 const RegionCodeGenTy &MasterOpGen, 12904 SourceLocation Loc) { 12905 llvm_unreachable("Not supported in SIMD-only mode"); 12906 } 12907 12908 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12909 const RegionCodeGenTy &MasterOpGen, 12910 SourceLocation Loc, 12911 const Expr *Filter) { 12912 llvm_unreachable("Not supported in SIMD-only mode"); 12913 } 12914 12915 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12916 SourceLocation Loc) { 12917 llvm_unreachable("Not supported in SIMD-only mode"); 12918 } 12919 12920 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12921 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12922 SourceLocation Loc) { 12923 llvm_unreachable("Not supported in SIMD-only mode"); 12924 } 12925 12926 void CGOpenMPSIMDRuntime::emitSingleRegion( 12927 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12928 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12929 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12930 ArrayRef<const Expr *> AssignmentOps) { 12931 llvm_unreachable("Not supported in SIMD-only mode"); 12932 } 12933 12934 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12935 const RegionCodeGenTy &OrderedOpGen, 12936 SourceLocation Loc, 12937 bool IsThreads) { 12938 llvm_unreachable("Not supported in SIMD-only mode"); 12939 } 12940 12941 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12942 SourceLocation Loc, 12943 OpenMPDirectiveKind Kind, 12944 bool EmitChecks, 12945 bool ForceSimpleCall) { 12946 llvm_unreachable("Not supported in SIMD-only mode"); 12947 } 12948 12949 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12950 CodeGenFunction &CGF, SourceLocation Loc, 12951 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12952 bool Ordered, const DispatchRTInput &DispatchValues) { 12953 llvm_unreachable("Not supported in SIMD-only mode"); 12954 } 12955 12956 void CGOpenMPSIMDRuntime::emitForStaticInit( 12957 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12958 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12959 llvm_unreachable("Not supported in SIMD-only mode"); 12960 } 12961 12962 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12963 CodeGenFunction &CGF, SourceLocation Loc, 12964 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12965 llvm_unreachable("Not supported in SIMD-only mode"); 12966 } 12967 12968 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12969 SourceLocation Loc, 12970 unsigned IVSize, 12971 bool IVSigned) { 12972 llvm_unreachable("Not supported in SIMD-only mode"); 12973 } 12974 12975 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12976 SourceLocation Loc, 12977 OpenMPDirectiveKind DKind) { 12978 llvm_unreachable("Not supported in SIMD-only mode"); 12979 } 12980 12981 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12982 SourceLocation Loc, 12983 unsigned IVSize, bool IVSigned, 12984 Address IL, Address LB, 12985 Address UB, Address ST) { 12986 llvm_unreachable("Not supported in SIMD-only mode"); 12987 } 12988 12989 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12990 llvm::Value *NumThreads, 12991 SourceLocation Loc) { 12992 llvm_unreachable("Not supported in SIMD-only mode"); 12993 } 12994 12995 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12996 ProcBindKind ProcBind, 12997 SourceLocation Loc) { 12998 llvm_unreachable("Not supported in SIMD-only mode"); 12999 } 13000 13001 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13002 const VarDecl *VD, 13003 Address VDAddr, 13004 SourceLocation Loc) { 13005 llvm_unreachable("Not supported in SIMD-only mode"); 13006 } 13007 13008 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13009 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13010 CodeGenFunction *CGF) { 13011 llvm_unreachable("Not supported in SIMD-only mode"); 13012 } 13013 13014 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13015 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13020 ArrayRef<const Expr *> Vars, 13021 SourceLocation Loc, 13022 llvm::AtomicOrdering AO) { 13023 llvm_unreachable("Not supported in SIMD-only mode"); 13024 } 13025 13026 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13027 const OMPExecutableDirective &D, 13028 llvm::Function *TaskFunction, 13029 QualType SharedsTy, Address Shareds, 13030 const Expr *IfCond, 13031 const OMPTaskDataTy &Data) { 13032 llvm_unreachable("Not supported in SIMD-only mode"); 13033 } 13034 13035 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13036 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13037 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13038 const Expr *IfCond, const OMPTaskDataTy &Data) { 13039 llvm_unreachable("Not supported in SIMD-only mode"); 13040 } 13041 13042 void CGOpenMPSIMDRuntime::emitReduction( 13043 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13044 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13045 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13046 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13047 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13048 ReductionOps, Options); 13049 } 13050 13051 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13052 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13053 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13054 llvm_unreachable("Not supported in SIMD-only mode"); 13055 } 13056 13057 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13058 SourceLocation Loc, 13059 bool IsWorksharingReduction) { 13060 llvm_unreachable("Not supported in SIMD-only mode"); 13061 } 13062 13063 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13064 SourceLocation Loc, 13065 ReductionCodeGen &RCG, 13066 unsigned N) { 13067 llvm_unreachable("Not supported in SIMD-only mode"); 13068 } 13069 13070 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13071 SourceLocation Loc, 13072 llvm::Value *ReductionsPtr, 13073 LValue SharedLVal) { 13074 llvm_unreachable("Not supported in SIMD-only mode"); 13075 } 13076 13077 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13078 SourceLocation Loc, 13079 const OMPTaskDataTy &Data) { 13080 llvm_unreachable("Not supported in SIMD-only mode"); 13081 } 13082 13083 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13084 CodeGenFunction &CGF, SourceLocation Loc, 13085 OpenMPDirectiveKind CancelRegion) { 13086 llvm_unreachable("Not supported in SIMD-only mode"); 13087 } 13088 13089 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13090 SourceLocation Loc, const Expr *IfCond, 13091 OpenMPDirectiveKind CancelRegion) { 13092 llvm_unreachable("Not supported in SIMD-only mode"); 13093 } 13094 13095 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13096 const OMPExecutableDirective &D, StringRef ParentName, 13097 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13098 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13099 llvm_unreachable("Not supported in SIMD-only mode"); 13100 } 13101 13102 void CGOpenMPSIMDRuntime::emitTargetCall( 13103 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13104 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13105 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13106 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13107 const OMPLoopDirective &D)> 13108 SizeEmitter) { 13109 llvm_unreachable("Not supported in SIMD-only mode"); 13110 } 13111 13112 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13113 llvm_unreachable("Not supported in SIMD-only mode"); 13114 } 13115 13116 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13117 llvm_unreachable("Not supported in SIMD-only mode"); 13118 } 13119 13120 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13121 return false; 13122 } 13123 13124 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13125 const OMPExecutableDirective &D, 13126 SourceLocation Loc, 13127 llvm::Function *OutlinedFn, 13128 ArrayRef<llvm::Value *> CapturedVars) { 13129 llvm_unreachable("Not supported in SIMD-only mode"); 13130 } 13131 13132 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13133 const Expr *NumTeams, 13134 const Expr *ThreadLimit, 13135 SourceLocation Loc) { 13136 llvm_unreachable("Not supported in SIMD-only mode"); 13137 } 13138 13139 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13140 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13141 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13142 llvm_unreachable("Not supported in SIMD-only mode"); 13143 } 13144 13145 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13146 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13147 const Expr *Device) { 13148 llvm_unreachable("Not supported in SIMD-only mode"); 13149 } 13150 13151 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13152 const OMPLoopDirective &D, 13153 ArrayRef<Expr *> NumIterations) { 13154 llvm_unreachable("Not supported in SIMD-only mode"); 13155 } 13156 13157 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13158 const OMPDependClause *C) { 13159 llvm_unreachable("Not supported in SIMD-only mode"); 13160 } 13161 13162 const VarDecl * 13163 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13164 const VarDecl *NativeParam) const { 13165 llvm_unreachable("Not supported in SIMD-only mode"); 13166 } 13167 13168 Address 13169 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13170 const VarDecl *NativeParam, 13171 const VarDecl *TargetParam) const { 13172 llvm_unreachable("Not supported in SIMD-only mode"); 13173 } 13174