1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = 701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 747 "omp.arraycpy.dest.element"); 748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 749 } 750 751 // Shift the address forward by one element. 752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 754 "omp.arraycpy.dest.element"); 755 // Check whether we've reached the end. 756 llvm::Value *Done = 757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 760 761 // Done. 762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 763 } 764 765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 766 return CGF.EmitOMPSharedLValue(E); 767 } 768 769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 770 const Expr *E) { 771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 773 return LValue(); 774 } 775 776 void ReductionCodeGen::emitAggregateInitialization( 777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 778 const OMPDeclareReductionDecl *DRD) { 779 // Emit VarDecl with copy init for arrays. 780 // Get the address of the original variable captured in current 781 // captured region. 782 const auto *PrivateVD = 783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 784 bool EmitDeclareReductionInit = 785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 787 EmitDeclareReductionInit, 788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 789 : PrivateVD->getInit(), 790 DRD, SharedAddr); 791 } 792 793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 794 ArrayRef<const Expr *> Origs, 795 ArrayRef<const Expr *> Privates, 796 ArrayRef<const Expr *> ReductionOps) { 797 ClausesData.reserve(Shareds.size()); 798 SharedAddresses.reserve(Shareds.size()); 799 Sizes.reserve(Shareds.size()); 800 BaseDecls.reserve(Shareds.size()); 801 const auto *IOrig = Origs.begin(); 802 const auto *IPriv = Privates.begin(); 803 const auto *IRed = ReductionOps.begin(); 804 for (const Expr *Ref : Shareds) { 805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 806 std::advance(IOrig, 1); 807 std::advance(IPriv, 1); 808 std::advance(IRed, 1); 809 } 810 } 811 812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 814 "Number of generated lvalues must be exactly N."); 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 817 SharedAddresses.emplace_back(First, Second); 818 if (ClausesData[N].Shared == ClausesData[N].Ref) { 819 OrigAddresses.emplace_back(First, Second); 820 } else { 821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 823 OrigAddresses.emplace_back(First, Second); 824 } 825 } 826 827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 828 const auto *PrivateVD = 829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 830 QualType PrivateType = PrivateVD->getType(); 831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 832 if (!PrivateType->isVariablyModifiedType()) { 833 Sizes.emplace_back( 834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 835 nullptr); 836 return; 837 } 838 llvm::Value *Size; 839 llvm::Value *SizeInChars; 840 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); 841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 842 if (AsArraySection) { 843 Size = CGF.Builder.CreatePtrDiff(ElemType, 844 OrigAddresses[N].second.getPointer(CGF), 845 OrigAddresses[N].first.getPointer(CGF)); 846 Size = CGF.Builder.CreateNUWAdd( 847 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 848 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 849 } else { 850 SizeInChars = 851 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 852 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 853 } 854 Sizes.emplace_back(SizeInChars, Size); 855 CodeGenFunction::OpaqueValueMapping OpaqueMap( 856 CGF, 857 cast<OpaqueValueExpr>( 858 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 859 RValue::get(Size)); 860 CGF.EmitVariablyModifiedType(PrivateType); 861 } 862 863 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 864 llvm::Value *Size) { 865 const auto *PrivateVD = 866 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 867 QualType PrivateType = PrivateVD->getType(); 868 if (!PrivateType->isVariablyModifiedType()) { 869 assert(!Size && !Sizes[N].second && 870 "Size should be nullptr for non-variably modified reduction " 871 "items."); 872 return; 873 } 874 CodeGenFunction::OpaqueValueMapping OpaqueMap( 875 CGF, 876 cast<OpaqueValueExpr>( 877 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 878 RValue::get(Size)); 879 CGF.EmitVariablyModifiedType(PrivateType); 880 } 881 882 void ReductionCodeGen::emitInitialization( 883 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 884 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 885 assert(SharedAddresses.size() > N && "No variable was generated"); 886 const auto *PrivateVD = 887 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 888 const OMPDeclareReductionDecl *DRD = 889 getReductionInit(ClausesData[N].ReductionOp); 890 QualType PrivateType = PrivateVD->getType(); 891 PrivateAddr = CGF.Builder.CreateElementBitCast( 892 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 893 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 894 if (DRD && DRD->getInitializer()) 895 (void)DefaultInit(CGF); 896 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 897 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 898 (void)DefaultInit(CGF); 899 QualType SharedType = SharedAddresses[N].first.getType(); 900 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 901 PrivateAddr, SharedAddr, SharedType); 902 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 903 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 904 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 905 PrivateVD->getType().getQualifiers(), 906 /*IsInitializer=*/false); 907 } 908 } 909 910 bool ReductionCodeGen::needCleanups(unsigned N) { 911 const auto *PrivateVD = 912 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 913 QualType PrivateType = PrivateVD->getType(); 914 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 915 return DTorKind != QualType::DK_none; 916 } 917 918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 919 Address PrivateAddr) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 if (needCleanups(N)) { 925 PrivateAddr = CGF.Builder.CreateElementBitCast( 926 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 927 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 928 } 929 } 930 931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 932 LValue BaseLV) { 933 BaseTy = BaseTy.getNonReferenceType(); 934 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 935 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 936 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 937 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 938 } else { 939 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 940 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 941 } 942 BaseTy = BaseTy->getPointeeType(); 943 } 944 return CGF.MakeAddrLValue( 945 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 946 CGF.ConvertTypeForMem(ElTy)), 947 BaseLV.getType(), BaseLV.getBaseInfo(), 948 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 949 } 950 951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 952 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 953 llvm::Value *Addr) { 954 Address Tmp = Address::invalid(); 955 Address TopTmp = Address::invalid(); 956 Address MostTopTmp = Address::invalid(); 957 BaseTy = BaseTy.getNonReferenceType(); 958 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 959 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 960 Tmp = CGF.CreateMemTemp(BaseTy); 961 if (TopTmp.isValid()) 962 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 963 else 964 MostTopTmp = Tmp; 965 TopTmp = Tmp; 966 BaseTy = BaseTy->getPointeeType(); 967 } 968 llvm::Type *Ty = BaseLVType; 969 if (Tmp.isValid()) 970 Ty = Tmp.getElementType(); 971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 972 if (Tmp.isValid()) { 973 CGF.Builder.CreateStore(Addr, Tmp); 974 return MostTopTmp; 975 } 976 return Address(Addr, BaseLVAlignment); 977 } 978 979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 980 const VarDecl *OrigVD = nullptr; 981 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 982 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 984 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 990 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 992 Base = TempASE->getBase()->IgnoreParenImpCasts(); 993 DE = cast<DeclRefExpr>(Base); 994 OrigVD = cast<VarDecl>(DE->getDecl()); 995 } 996 return OrigVD; 997 } 998 999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1000 Address PrivateAddr) { 1001 const DeclRefExpr *DE; 1002 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1003 BaseDecls.emplace_back(OrigVD); 1004 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1005 LValue BaseLValue = 1006 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1007 OriginalBaseLValue); 1008 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1009 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1010 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 1011 SharedAddr.getPointer()); 1012 llvm::Value *PrivatePointer = 1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1014 PrivateAddr.getPointer(), SharedAddr.getType()); 1015 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1016 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1017 return castToBase(CGF, OrigVD->getType(), 1018 SharedAddresses[N].first.getType(), 1019 OriginalBaseLValue.getAddress(CGF).getType(), 1020 OriginalBaseLValue.getAlignment(), Ptr); 1021 } 1022 BaseDecls.emplace_back( 1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1024 return PrivateAddr; 1025 } 1026 1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1028 const OMPDeclareReductionDecl *DRD = 1029 getReductionInit(ClausesData[N].ReductionOp); 1030 return DRD && DRD->getInitializer(); 1031 } 1032 1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1034 return CGF.EmitLoadOfPointerLValue( 1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1036 getThreadIDVariable()->getType()->castAs<PointerType>()); 1037 } 1038 1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1040 if (!CGF.HaveInsertPoint()) 1041 return; 1042 // 1.2.2 OpenMP Language Terminology 1043 // Structured block - An executable statement with a single entry at the 1044 // top and a single exit at the bottom. 1045 // The point of exit cannot be a branch out of the structured block. 1046 // longjmp() and throw() must not violate the entry/exit criteria. 1047 CGF.EHStack.pushTerminate(); 1048 if (S) 1049 CGF.incrementProfileCounter(S); 1050 CodeGen(CGF); 1051 CGF.EHStack.popTerminate(); 1052 } 1053 1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1055 CodeGenFunction &CGF) { 1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1057 getThreadIDVariable()->getType(), 1058 AlignmentSource::Decl); 1059 } 1060 1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1062 QualType FieldTy) { 1063 auto *Field = FieldDecl::Create( 1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1067 Field->setAccess(AS_public); 1068 DC->addDecl(Field); 1069 return Field; 1070 } 1071 1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1073 StringRef Separator) 1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1077 1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1079 OMPBuilder.initialize(); 1080 loadOffloadInfoMetadata(); 1081 } 1082 1083 void CGOpenMPRuntime::clear() { 1084 InternalVars.clear(); 1085 // Clean non-target variable declarations possibly used only in debug info. 1086 for (const auto &Data : EmittedNonTargetVariables) { 1087 if (!Data.getValue().pointsToAliveValue()) 1088 continue; 1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1090 if (!GV) 1091 continue; 1092 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1093 continue; 1094 GV->eraseFromParent(); 1095 } 1096 } 1097 1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1099 SmallString<128> Buffer; 1100 llvm::raw_svector_ostream OS(Buffer); 1101 StringRef Sep = FirstSeparator; 1102 for (StringRef Part : Parts) { 1103 OS << Sep << Part; 1104 Sep = Separator; 1105 } 1106 return std::string(OS.str()); 1107 } 1108 1109 static llvm::Function * 1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1111 const Expr *CombinerInitializer, const VarDecl *In, 1112 const VarDecl *Out, bool IsCombiner) { 1113 // void .omp_combiner.(Ty *in, Ty *out); 1114 ASTContext &C = CGM.getContext(); 1115 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1116 FunctionArgList Args; 1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 Args.push_back(&OmpOutParm); 1122 Args.push_back(&OmpInParm); 1123 const CGFunctionInfo &FnInfo = 1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1126 std::string Name = CGM.getOpenMPRuntime().getName( 1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1129 Name, &CGM.getModule()); 1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1131 if (CGM.getLangOpts().Optimize) { 1132 Fn->removeFnAttr(llvm::Attribute::NoInline); 1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1135 } 1136 CodeGenFunction CGF(CGM); 1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1140 Out->getLocation()); 1141 CodeGenFunction::OMPPrivateScope Scope(CGF); 1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1145 .getAddress(CGF); 1146 }); 1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 (void)Scope.Privatize(); 1153 if (!IsCombiner && Out->hasInit() && 1154 !CGF.isTrivialInitializer(Out->getInit())) { 1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1156 Out->getType().getQualifiers(), 1157 /*IsInitializer=*/true); 1158 } 1159 if (CombinerInitializer) 1160 CGF.EmitIgnoredExpr(CombinerInitializer); 1161 Scope.ForceCleanup(); 1162 CGF.FinishFunction(); 1163 return Fn; 1164 } 1165 1166 void CGOpenMPRuntime::emitUserDefinedReduction( 1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1168 if (UDRMap.count(D) > 0) 1169 return; 1170 llvm::Function *Combiner = emitCombinerOrInitializer( 1171 CGM, D->getType(), D->getCombiner(), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1174 /*IsCombiner=*/true); 1175 llvm::Function *Initializer = nullptr; 1176 if (const Expr *Init = D->getInitializer()) { 1177 Initializer = emitCombinerOrInitializer( 1178 CGM, D->getType(), 1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1180 : nullptr, 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1183 /*IsCombiner=*/false); 1184 } 1185 UDRMap.try_emplace(D, Combiner, Initializer); 1186 if (CGF) { 1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1188 Decls.second.push_back(D); 1189 } 1190 } 1191 1192 std::pair<llvm::Function *, llvm::Function *> 1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1194 auto I = UDRMap.find(D); 1195 if (I != UDRMap.end()) 1196 return I->second; 1197 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1198 return UDRMap.lookup(D); 1199 } 1200 1201 namespace { 1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1203 // Builder if one is present. 1204 struct PushAndPopStackRAII { 1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1206 bool HasCancel, llvm::omp::Directive Kind) 1207 : OMPBuilder(OMPBuilder) { 1208 if (!OMPBuilder) 1209 return; 1210 1211 // The following callback is the crucial part of clangs cleanup process. 1212 // 1213 // NOTE: 1214 // Once the OpenMPIRBuilder is used to create parallel regions (and 1215 // similar), the cancellation destination (Dest below) is determined via 1216 // IP. That means if we have variables to finalize we split the block at IP, 1217 // use the new block (=BB) as destination to build a JumpDest (via 1218 // getJumpDestInCurrentScope(BB)) which then is fed to 1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1220 // to push & pop an FinalizationInfo object. 1221 // The FiniCB will still be needed but at the point where the 1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1224 assert(IP.getBlock()->end() == IP.getPoint() && 1225 "Clang CG should cause non-terminated block!"); 1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1227 CGF.Builder.restoreIP(IP); 1228 CodeGenFunction::JumpDest Dest = 1229 CGF.getOMPCancelDestination(OMPD_parallel); 1230 CGF.EmitBranchThroughCleanup(Dest); 1231 }; 1232 1233 // TODO: Remove this once we emit parallel regions through the 1234 // OpenMPIRBuilder as it can do this setup internally. 1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1236 OMPBuilder->pushFinalizationCB(std::move(FI)); 1237 } 1238 ~PushAndPopStackRAII() { 1239 if (OMPBuilder) 1240 OMPBuilder->popFinalizationCB(); 1241 } 1242 llvm::OpenMPIRBuilder *OMPBuilder; 1243 }; 1244 } // namespace 1245 1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1250 assert(ThreadIDVar->getType()->isPointerType() && 1251 "thread id variable must be of type kmp_int32 *"); 1252 CodeGenFunction CGF(CGM, true); 1253 bool HasCancel = false; 1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1255 HasCancel = OPD->hasCancel(); 1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1257 HasCancel = OPD->hasCancel(); 1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1259 HasCancel = OPSD->hasCancel(); 1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1261 HasCancel = OPFD->hasCancel(); 1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1263 HasCancel = OPFD->hasCancel(); 1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1274 // parallel region to make cancellation barriers work properly. 1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1278 HasCancel, OutlinedHelperName); 1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1281 } 1282 1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1287 return emitParallelOrTeamsOutlinedFunction( 1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1303 bool Tied, unsigned &NumberOfParts) { 1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1305 PrePostActionTy &) { 1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1308 llvm::Value *TaskArgs[] = { 1309 UpLoc, ThreadID, 1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1311 TaskTVar->getType()->castAs<PointerType>()) 1312 .getPointer(CGF)}; 1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1314 CGM.getModule(), OMPRTL___kmpc_omp_task), 1315 TaskArgs); 1316 }; 1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1318 UntiedCodeGen); 1319 CodeGen.setAction(Action); 1320 assert(!ThreadIDVar->getType()->isPointerType() && 1321 "thread id variable must be of type kmp_int32 for tasks"); 1322 const OpenMPDirectiveKind Region = 1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1324 : OMPD_task; 1325 const CapturedStmt *CS = D.getCapturedStmt(Region); 1326 bool HasCancel = false; 1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1328 HasCancel = TD->hasCancel(); 1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1332 HasCancel = TD->hasCancel(); 1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 1336 CodeGenFunction CGF(CGM, true); 1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1338 InnermostKind, HasCancel, Action); 1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1341 if (!Tied) 1342 NumberOfParts = Action.getNumberOfParts(); 1343 return Res; 1344 } 1345 1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1347 const RecordDecl *RD, const CGRecordLayout &RL, 1348 ArrayRef<llvm::Constant *> Data) { 1349 llvm::StructType *StructTy = RL.getLLVMType(); 1350 unsigned PrevIdx = 0; 1351 ConstantInitBuilder CIBuilder(CGM); 1352 auto DI = Data.begin(); 1353 for (const FieldDecl *FD : RD->fields()) { 1354 unsigned Idx = RL.getLLVMFieldNo(FD); 1355 // Fill the alignment. 1356 for (unsigned I = PrevIdx; I < Idx; ++I) 1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1358 PrevIdx = Idx + 1; 1359 Fields.add(*DI); 1360 ++DI; 1361 } 1362 } 1363 1364 template <class... As> 1365 static llvm::GlobalVariable * 1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1367 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1368 As &&... Args) { 1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1371 ConstantInitBuilder CIBuilder(CGM); 1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 return Fields.finishAndCreateGlobal( 1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1376 std::forward<As>(Args)...); 1377 } 1378 1379 template <typename T> 1380 static void 1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1382 ArrayRef<llvm::Constant *> Data, 1383 T &Parent) { 1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1387 buildStructValue(Fields, CGM, RD, RL, Data); 1388 Fields.finishAndAddTo(Parent); 1389 } 1390 1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1392 bool AtCurrentPoint) { 1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1395 1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1397 if (AtCurrentPoint) { 1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1400 } else { 1401 Elem.second.ServiceInsertPt = 1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1404 } 1405 } 1406 1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1409 if (Elem.second.ServiceInsertPt) { 1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1411 Elem.second.ServiceInsertPt = nullptr; 1412 Ptr->eraseFromParent(); 1413 } 1414 } 1415 1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1417 SourceLocation Loc, 1418 SmallString<128> &Buffer) { 1419 llvm::raw_svector_ostream OS(Buffer); 1420 // Build debug location 1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1422 OS << ";" << PLoc.getFilename() << ";"; 1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1424 OS << FD->getQualifiedNameAsString(); 1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1426 return OS.str(); 1427 } 1428 1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1430 SourceLocation Loc, 1431 unsigned Flags) { 1432 uint32_t SrcLocStrSize; 1433 llvm::Constant *SrcLocStr; 1434 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1435 Loc.isInvalid()) { 1436 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1437 } else { 1438 std::string FunctionName; 1439 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1440 FunctionName = FD->getQualifiedNameAsString(); 1441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1442 const char *FileName = PLoc.getFilename(); 1443 unsigned Line = PLoc.getLine(); 1444 unsigned Column = PLoc.getColumn(); 1445 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1446 Column, SrcLocStrSize); 1447 } 1448 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1449 return OMPBuilder.getOrCreateIdent( 1450 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1451 } 1452 1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1454 SourceLocation Loc) { 1455 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1456 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1457 // the clang invariants used below might be broken. 1458 if (CGM.getLangOpts().OpenMPIRBuilder) { 1459 SmallString<128> Buffer; 1460 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1461 uint32_t SrcLocStrSize; 1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1464 return OMPBuilder.getOrCreateThreadID( 1465 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1466 } 1467 1468 llvm::Value *ThreadID = nullptr; 1469 // Check whether we've already cached a load of the thread id in this 1470 // function. 1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1472 if (I != OpenMPLocThreadIDMap.end()) { 1473 ThreadID = I->second.ThreadID; 1474 if (ThreadID != nullptr) 1475 return ThreadID; 1476 } 1477 // If exceptions are enabled, do not use parameter to avoid possible crash. 1478 if (auto *OMPRegionInfo = 1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1480 if (OMPRegionInfo->getThreadIDVariable()) { 1481 // Check if this an outlined function with thread id passed as argument. 1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1485 !CGF.getLangOpts().CXXExceptions || 1486 CGF.Builder.GetInsertBlock() == TopBlock || 1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1489 TopBlock || 1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1491 CGF.Builder.GetInsertBlock()) { 1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1493 // If value loaded in entry block, cache it and use it everywhere in 1494 // function. 1495 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1497 Elem.second.ThreadID = ThreadID; 1498 } 1499 return ThreadID; 1500 } 1501 } 1502 } 1503 1504 // This is not an outlined function region - need to call __kmpc_int32 1505 // kmpc_global_thread_num(ident_t *loc). 1506 // Generate thread id value and cache this value for use across the 1507 // function. 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 if (!Elem.second.ServiceInsertPt) 1510 setLocThreadIdInsertPt(CGF); 1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1513 llvm::CallInst *Call = CGF.Builder.CreateCall( 1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1515 OMPRTL___kmpc_global_thread_num), 1516 emitUpdateLocation(CGF, Loc)); 1517 Call->setCallingConv(CGF.getRuntimeCC()); 1518 Elem.second.ThreadID = Call; 1519 return Call; 1520 } 1521 1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1523 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1525 clearLocThreadIdInsertPt(CGF); 1526 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1527 } 1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1529 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1530 UDRMap.erase(D); 1531 FunctionUDRMap.erase(CGF.CurFn); 1532 } 1533 auto I = FunctionUDMMap.find(CGF.CurFn); 1534 if (I != FunctionUDMMap.end()) { 1535 for(const auto *D : I->second) 1536 UDMMap.erase(D); 1537 FunctionUDMMap.erase(I); 1538 } 1539 LastprivateConditionalToTypes.erase(CGF.CurFn); 1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1544 return OMPBuilder.IdentPtr; 1545 } 1546 1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1548 if (!Kmpc_MicroTy) { 1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1551 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1553 } 1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1555 } 1556 1557 llvm::FunctionCallee 1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1559 bool IsGPUDistribute) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name; 1563 if (IsGPUDistribute) 1564 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1565 : "__kmpc_distribute_static_init_4u") 1566 : (IVSigned ? "__kmpc_distribute_static_init_8" 1567 : "__kmpc_distribute_static_init_8u"); 1568 else 1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1570 : "__kmpc_for_static_init_4u") 1571 : (IVSigned ? "__kmpc_for_static_init_8" 1572 : "__kmpc_for_static_init_8u"); 1573 1574 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1575 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1576 llvm::Type *TypeParams[] = { 1577 getIdentTyPointerTy(), // loc 1578 CGM.Int32Ty, // tid 1579 CGM.Int32Ty, // schedtype 1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1581 PtrTy, // p_lower 1582 PtrTy, // p_upper 1583 PtrTy, // p_stride 1584 ITy, // incr 1585 ITy // chunk 1586 }; 1587 auto *FnTy = 1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1589 return CGM.CreateRuntimeFunction(FnTy, Name); 1590 } 1591 1592 llvm::FunctionCallee 1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1594 assert((IVSize == 32 || IVSize == 64) && 1595 "IV size is not compatible with the omp runtime"); 1596 StringRef Name = 1597 IVSize == 32 1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1600 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1602 CGM.Int32Ty, // tid 1603 CGM.Int32Ty, // schedtype 1604 ITy, // lower 1605 ITy, // upper 1606 ITy, // stride 1607 ITy // chunk 1608 }; 1609 auto *FnTy = 1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1611 return CGM.CreateRuntimeFunction(FnTy, Name); 1612 } 1613 1614 llvm::FunctionCallee 1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1616 assert((IVSize == 32 || IVSize == 64) && 1617 "IV size is not compatible with the omp runtime"); 1618 StringRef Name = 1619 IVSize == 32 1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1622 llvm::Type *TypeParams[] = { 1623 getIdentTyPointerTy(), // loc 1624 CGM.Int32Ty, // tid 1625 }; 1626 auto *FnTy = 1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1628 return CGM.CreateRuntimeFunction(FnTy, Name); 1629 } 1630 1631 llvm::FunctionCallee 1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1633 assert((IVSize == 32 || IVSize == 64) && 1634 "IV size is not compatible with the omp runtime"); 1635 StringRef Name = 1636 IVSize == 32 1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1639 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1640 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1641 llvm::Type *TypeParams[] = { 1642 getIdentTyPointerTy(), // loc 1643 CGM.Int32Ty, // tid 1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1645 PtrTy, // p_lower 1646 PtrTy, // p_upper 1647 PtrTy // p_stride 1648 }; 1649 auto *FnTy = 1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1651 return CGM.CreateRuntimeFunction(FnTy, Name); 1652 } 1653 1654 /// Obtain information that uniquely identifies a target entry. This 1655 /// consists of the file and device IDs as well as line number associated with 1656 /// the relevant entry source location. 1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1658 unsigned &DeviceID, unsigned &FileID, 1659 unsigned &LineNum) { 1660 SourceManager &SM = C.getSourceManager(); 1661 1662 // The loc should be always valid and have a file ID (the user cannot use 1663 // #pragma directives in macros) 1664 1665 assert(Loc.isValid() && "Source location is expected to be always valid."); 1666 1667 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1668 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1669 1670 llvm::sys::fs::UniqueID ID; 1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1672 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1673 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1674 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1675 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1676 << PLoc.getFilename() << EC.message(); 1677 } 1678 1679 DeviceID = ID.getDevice(); 1680 FileID = ID.getFile(); 1681 LineNum = PLoc.getLine(); 1682 } 1683 1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1685 if (CGM.getLangOpts().OpenMPSimd) 1686 return Address::invalid(); 1687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1689 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1690 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1691 HasRequiresUnifiedSharedMemory))) { 1692 SmallString<64> PtrName; 1693 { 1694 llvm::raw_svector_ostream OS(PtrName); 1695 OS << CGM.getMangledName(GlobalDecl(VD)); 1696 if (!VD->isExternallyVisible()) { 1697 unsigned DeviceID, FileID, Line; 1698 getTargetEntryUniqueInfo(CGM.getContext(), 1699 VD->getCanonicalDecl()->getBeginLoc(), 1700 DeviceID, FileID, Line); 1701 OS << llvm::format("_%x", FileID); 1702 } 1703 OS << "_decl_tgt_ref_ptr"; 1704 } 1705 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1706 if (!Ptr) { 1707 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1708 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1709 PtrName); 1710 1711 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1712 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1713 1714 if (!CGM.getLangOpts().OpenMPIsDevice) 1715 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1716 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1717 } 1718 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1719 } 1720 return Address::invalid(); 1721 } 1722 1723 llvm::Constant * 1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1725 assert(!CGM.getLangOpts().OpenMPUseTLS || 1726 !CGM.getContext().getTargetInfo().isTLSSupported()); 1727 // Lookup the entry, lazily creating it if necessary. 1728 std::string Suffix = getName({"cache", ""}); 1729 return getOrCreateInternalVariable( 1730 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1731 } 1732 1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1734 const VarDecl *VD, 1735 Address VDAddr, 1736 SourceLocation Loc) { 1737 if (CGM.getLangOpts().OpenMPUseTLS && 1738 CGM.getContext().getTargetInfo().isTLSSupported()) 1739 return VDAddr; 1740 1741 llvm::Type *VarTy = VDAddr.getElementType(); 1742 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1743 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1744 CGM.Int8PtrTy), 1745 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1746 getOrCreateThreadPrivateCache(VD)}; 1747 return Address(CGF.EmitRuntimeCall( 1748 OMPBuilder.getOrCreateRuntimeFunction( 1749 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1750 Args), 1751 VDAddr.getAlignment()); 1752 } 1753 1754 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1758 // library. 1759 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1760 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1761 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1762 OMPLoc); 1763 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1764 // to register constructor/destructor for variable. 1765 llvm::Value *Args[] = { 1766 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1767 Ctor, CopyCtor, Dtor}; 1768 CGF.EmitRuntimeCall( 1769 OMPBuilder.getOrCreateRuntimeFunction( 1770 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1771 Args); 1772 } 1773 1774 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1775 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1776 bool PerformInit, CodeGenFunction *CGF) { 1777 if (CGM.getLangOpts().OpenMPUseTLS && 1778 CGM.getContext().getTargetInfo().isTLSSupported()) 1779 return nullptr; 1780 1781 VD = VD->getDefinition(CGM.getContext()); 1782 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1783 QualType ASTTy = VD->getType(); 1784 1785 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1786 const Expr *Init = VD->getAnyInitializer(); 1787 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1788 // Generate function that re-emits the declaration's initializer into the 1789 // threadprivate copy of the variable VD 1790 CodeGenFunction CtorCGF(CGM); 1791 FunctionArgList Args; 1792 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1793 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1794 ImplicitParamDecl::Other); 1795 Args.push_back(&Dst); 1796 1797 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1798 CGM.getContext().VoidPtrTy, Args); 1799 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1800 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1801 llvm::Function *Fn = 1802 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1803 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1804 Args, Loc, Loc); 1805 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1809 Arg = CtorCGF.Builder.CreateElementBitCast( 1810 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1811 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1812 /*IsInitializer=*/true); 1813 ArgVal = CtorCGF.EmitLoadOfScalar( 1814 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1815 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1816 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1817 CtorCGF.FinishFunction(); 1818 Ctor = Fn; 1819 } 1820 if (VD->getType().isDestructedType() != QualType::DK_none) { 1821 // Generate function that emits destructor call for the threadprivate copy 1822 // of the variable VD 1823 CodeGenFunction DtorCGF(CGM); 1824 FunctionArgList Args; 1825 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1826 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1827 ImplicitParamDecl::Other); 1828 Args.push_back(&Dst); 1829 1830 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1831 CGM.getContext().VoidTy, Args); 1832 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1833 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1834 llvm::Function *Fn = 1835 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1836 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1837 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1838 Loc, Loc); 1839 // Create a scope with an artificial location for the body of this function. 1840 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1841 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1842 DtorCGF.GetAddrOfLocalVar(&Dst), 1843 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1844 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1845 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1846 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1847 DtorCGF.FinishFunction(); 1848 Dtor = Fn; 1849 } 1850 // Do not emit init function if it is not required. 1851 if (!Ctor && !Dtor) 1852 return nullptr; 1853 1854 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1855 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 // Copying constructor for the threadprivate variable. 1859 // Must be NULL - reserved by runtime, but currently it requires that this 1860 // parameter is always NULL. Otherwise it fires assertion. 1861 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1862 if (Ctor == nullptr) { 1863 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1864 /*isVarArg=*/false) 1865 ->getPointerTo(); 1866 Ctor = llvm::Constant::getNullValue(CtorTy); 1867 } 1868 if (Dtor == nullptr) { 1869 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1870 /*isVarArg=*/false) 1871 ->getPointerTo(); 1872 Dtor = llvm::Constant::getNullValue(DtorTy); 1873 } 1874 if (!CGF) { 1875 auto *InitFunctionTy = 1876 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1877 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1878 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1879 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1880 CodeGenFunction InitCGF(CGM); 1881 FunctionArgList ArgList; 1882 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1883 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1884 Loc, Loc); 1885 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1886 InitCGF.FinishFunction(); 1887 return InitFunction; 1888 } 1889 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1890 } 1891 return nullptr; 1892 } 1893 1894 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1895 llvm::GlobalVariable *Addr, 1896 bool PerformInit) { 1897 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1898 !CGM.getLangOpts().OpenMPIsDevice) 1899 return false; 1900 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1901 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1902 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1903 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1904 HasRequiresUnifiedSharedMemory)) 1905 return CGM.getLangOpts().OpenMPIsDevice; 1906 VD = VD->getDefinition(CGM.getContext()); 1907 assert(VD && "Unknown VarDecl"); 1908 1909 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1910 return CGM.getLangOpts().OpenMPIsDevice; 1911 1912 QualType ASTTy = VD->getType(); 1913 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1914 1915 // Produce the unique prefix to identify the new target regions. We use 1916 // the source location of the variable declaration which we know to not 1917 // conflict with any target region. 1918 unsigned DeviceID; 1919 unsigned FileID; 1920 unsigned Line; 1921 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1922 SmallString<128> Buffer, Out; 1923 { 1924 llvm::raw_svector_ostream OS(Buffer); 1925 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1926 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1927 } 1928 1929 const Expr *Init = VD->getAnyInitializer(); 1930 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1931 llvm::Constant *Ctor; 1932 llvm::Constant *ID; 1933 if (CGM.getLangOpts().OpenMPIsDevice) { 1934 // Generate function that re-emits the declaration's initializer into 1935 // the threadprivate copy of the variable VD 1936 CodeGenFunction CtorCGF(CGM); 1937 1938 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1939 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1940 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1941 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1942 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1943 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1944 FunctionArgList(), Loc, Loc); 1945 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1946 CtorCGF.EmitAnyExprToMem(Init, 1947 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1948 Init->getType().getQualifiers(), 1949 /*IsInitializer=*/true); 1950 CtorCGF.FinishFunction(); 1951 Ctor = Fn; 1952 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1953 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1954 } else { 1955 Ctor = new llvm::GlobalVariable( 1956 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1957 llvm::GlobalValue::PrivateLinkage, 1958 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1959 ID = Ctor; 1960 } 1961 1962 // Register the information for the entry associated with the constructor. 1963 Out.clear(); 1964 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1965 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1966 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1967 } 1968 if (VD->getType().isDestructedType() != QualType::DK_none) { 1969 llvm::Constant *Dtor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that emits destructor call for the threadprivate 1973 // copy of the variable VD 1974 CodeGenFunction DtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1979 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1981 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 // Create a scope with an artificial location for the body of this 1984 // function. 1985 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1986 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1989 DtorCGF.FinishFunction(); 1990 Dtor = Fn; 1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1993 } else { 1994 Dtor = new llvm::GlobalVariable( 1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1996 llvm::GlobalValue::PrivateLinkage, 1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1998 ID = Dtor; 1999 } 2000 // Register the information for the entry associated with the destructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2005 } 2006 return CGM.getLangOpts().OpenMPIsDevice; 2007 } 2008 2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2010 QualType VarType, 2011 StringRef Name) { 2012 std::string Suffix = getName({"artificial", ""}); 2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2014 llvm::GlobalVariable *GAddr = 2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2017 CGM.getTarget().isTLSSupported()) { 2018 GAddr->setThreadLocal(/*Val=*/true); 2019 return Address(GAddr, GAddr->getValueType(), 2020 CGM.getContext().getTypeAlignInChars(VarType)); 2021 } 2022 std::string CacheSuffix = getName({"cache", ""}); 2023 llvm::Value *Args[] = { 2024 emitUpdateLocation(CGF, SourceLocation()), 2025 getThreadID(CGF, SourceLocation()), 2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2028 /*isSigned=*/false), 2029 getOrCreateInternalVariable( 2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2031 return Address( 2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2033 CGF.EmitRuntimeCall( 2034 OMPBuilder.getOrCreateRuntimeFunction( 2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2036 Args), 2037 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2038 CGM.getContext().getTypeAlignInChars(VarType)); 2039 } 2040 2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2042 const RegionCodeGenTy &ThenGen, 2043 const RegionCodeGenTy &ElseGen) { 2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2045 2046 // If the condition constant folds and can be elided, try to avoid emitting 2047 // the condition and the dead arm of the if/else. 2048 bool CondConstant; 2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2050 if (CondConstant) 2051 ThenGen(CGF); 2052 else 2053 ElseGen(CGF); 2054 return; 2055 } 2056 2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2058 // emit the conditional branch. 2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2063 2064 // Emit the 'then' code. 2065 CGF.EmitBlock(ThenBlock); 2066 ThenGen(CGF); 2067 CGF.EmitBranch(ContBlock); 2068 // Emit the 'else' code if present. 2069 // There is no need to emit line number for unconditional branch. 2070 (void)ApplyDebugLocation::CreateEmpty(CGF); 2071 CGF.EmitBlock(ElseBlock); 2072 ElseGen(CGF); 2073 // There is no need to emit line number for unconditional branch. 2074 (void)ApplyDebugLocation::CreateEmpty(CGF); 2075 CGF.EmitBranch(ContBlock); 2076 // Emit the continuation block for code after the if. 2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2078 } 2079 2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2081 llvm::Function *OutlinedFn, 2082 ArrayRef<llvm::Value *> CapturedVars, 2083 const Expr *IfCond, 2084 llvm::Value *NumThreads) { 2085 if (!CGF.HaveInsertPoint()) 2086 return; 2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2088 auto &M = CGM.getModule(); 2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2090 this](CodeGenFunction &CGF, PrePostActionTy &) { 2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2093 llvm::Value *Args[] = { 2094 RTLoc, 2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2097 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2098 RealArgs.append(std::begin(Args), std::end(Args)); 2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2100 2101 llvm::FunctionCallee RTLFn = 2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2103 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2104 }; 2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2106 this](CodeGenFunction &CGF, PrePostActionTy &) { 2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2109 // Build calls: 2110 // __kmpc_serialized_parallel(&Loc, GTid); 2111 llvm::Value *Args[] = {RTLoc, ThreadID}; 2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2113 M, OMPRTL___kmpc_serialized_parallel), 2114 Args); 2115 2116 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2118 Address ZeroAddrBound = 2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2120 /*Name=*/".bound.zero.addr"); 2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2123 // ThreadId for serialized parallels is 0. 2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2127 2128 // Ensure we do not inline the function. This is trivially true for the ones 2129 // passed to __kmpc_fork_call but the ones called in serialized regions 2130 // could be inlined. This is not a perfect but it is closer to the invariant 2131 // we want, namely, every data environment starts with a new function. 2132 // TODO: We should pass the if condition to the runtime function and do the 2133 // handling there. Much cleaner code. 2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2137 2138 // __kmpc_end_serialized_parallel(&Loc, GTid); 2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2141 M, OMPRTL___kmpc_end_serialized_parallel), 2142 EndArgs); 2143 }; 2144 if (IfCond) { 2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2146 } else { 2147 RegionCodeGenTy ThenRCG(ThenGen); 2148 ThenRCG(CGF); 2149 } 2150 } 2151 2152 // If we're inside an (outlined) parallel region, use the region info's 2153 // thread-ID variable (it is passed in a first argument of the outlined function 2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2155 // regular serial code region, get thread ID by calling kmp_int32 2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2157 // return the address of that temp. 2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2159 SourceLocation Loc) { 2160 if (auto *OMPRegionInfo = 2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2162 if (OMPRegionInfo->getThreadIDVariable()) 2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2164 2165 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2166 QualType Int32Ty = 2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2169 CGF.EmitStoreOfScalar(ThreadID, 2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2171 2172 return ThreadIDTemp; 2173 } 2174 2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2177 SmallString<256> Buffer; 2178 llvm::raw_svector_ostream Out(Buffer); 2179 Out << Name; 2180 StringRef RuntimeName = Out.str(); 2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2182 if (Elem.second) { 2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2184 "OMP internal variable has different type than requested"); 2185 return &*Elem.second; 2186 } 2187 2188 return Elem.second = new llvm::GlobalVariable( 2189 CGM.getModule(), Ty, /*IsConstant*/ false, 2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2191 Elem.first(), /*InsertBefore=*/nullptr, 2192 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2193 } 2194 2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2197 std::string Name = getName({Prefix, "var"}); 2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2199 } 2200 2201 namespace { 2202 /// Common pre(post)-action for different OpenMP constructs. 2203 class CommonActionTy final : public PrePostActionTy { 2204 llvm::FunctionCallee EnterCallee; 2205 ArrayRef<llvm::Value *> EnterArgs; 2206 llvm::FunctionCallee ExitCallee; 2207 ArrayRef<llvm::Value *> ExitArgs; 2208 bool Conditional; 2209 llvm::BasicBlock *ContBlock = nullptr; 2210 2211 public: 2212 CommonActionTy(llvm::FunctionCallee EnterCallee, 2213 ArrayRef<llvm::Value *> EnterArgs, 2214 llvm::FunctionCallee ExitCallee, 2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2217 ExitArgs(ExitArgs), Conditional(Conditional) {} 2218 void Enter(CodeGenFunction &CGF) override { 2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2220 if (Conditional) { 2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2223 ContBlock = CGF.createBasicBlock("omp_if.end"); 2224 // Generate the branch (If-stmt) 2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2226 CGF.EmitBlock(ThenBlock); 2227 } 2228 } 2229 void Done(CodeGenFunction &CGF) { 2230 // Emit the rest of blocks/branches 2231 CGF.EmitBranch(ContBlock); 2232 CGF.EmitBlock(ContBlock, true); 2233 } 2234 void Exit(CodeGenFunction &CGF) override { 2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2236 } 2237 }; 2238 } // anonymous namespace 2239 2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2241 StringRef CriticalName, 2242 const RegionCodeGenTy &CriticalOpGen, 2243 SourceLocation Loc, const Expr *Hint) { 2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2245 // CriticalOpGen(); 2246 // __kmpc_end_critical(ident_t *, gtid, Lock); 2247 // Prepare arguments and build a call to __kmpc_critical 2248 if (!CGF.HaveInsertPoint()) 2249 return; 2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2251 getCriticalRegionLock(CriticalName)}; 2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2253 std::end(Args)); 2254 if (Hint) { 2255 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2257 } 2258 CommonActionTy Action( 2259 OMPBuilder.getOrCreateRuntimeFunction( 2260 CGM.getModule(), 2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2262 EnterArgs, 2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2264 OMPRTL___kmpc_end_critical), 2265 Args); 2266 CriticalOpGen.setAction(Action); 2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2268 } 2269 2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2271 const RegionCodeGenTy &MasterOpGen, 2272 SourceLocation Loc) { 2273 if (!CGF.HaveInsertPoint()) 2274 return; 2275 // if(__kmpc_master(ident_t *, gtid)) { 2276 // MasterOpGen(); 2277 // __kmpc_end_master(ident_t *, gtid); 2278 // } 2279 // Prepare arguments and build a call to __kmpc_master 2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2282 CGM.getModule(), OMPRTL___kmpc_master), 2283 Args, 2284 OMPBuilder.getOrCreateRuntimeFunction( 2285 CGM.getModule(), OMPRTL___kmpc_end_master), 2286 Args, 2287 /*Conditional=*/true); 2288 MasterOpGen.setAction(Action); 2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2290 Action.Done(CGF); 2291 } 2292 2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2294 const RegionCodeGenTy &MaskedOpGen, 2295 SourceLocation Loc, const Expr *Filter) { 2296 if (!CGF.HaveInsertPoint()) 2297 return; 2298 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2299 // MaskedOpGen(); 2300 // __kmpc_end_masked(iden_t *, gtid); 2301 // } 2302 // Prepare arguments and build a call to __kmpc_masked 2303 llvm::Value *FilterVal = Filter 2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2307 FilterVal}; 2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2309 getThreadID(CGF, Loc)}; 2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2311 CGM.getModule(), OMPRTL___kmpc_masked), 2312 Args, 2313 OMPBuilder.getOrCreateRuntimeFunction( 2314 CGM.getModule(), OMPRTL___kmpc_end_masked), 2315 ArgsEnd, 2316 /*Conditional=*/true); 2317 MaskedOpGen.setAction(Action); 2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2319 Action.Done(CGF); 2320 } 2321 2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2323 SourceLocation Loc) { 2324 if (!CGF.HaveInsertPoint()) 2325 return; 2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2327 OMPBuilder.createTaskyield(CGF.Builder); 2328 } else { 2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2330 llvm::Value *Args[] = { 2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2335 Args); 2336 } 2337 2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2339 Region->emitUntiedSwitch(CGF); 2340 } 2341 2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2343 const RegionCodeGenTy &TaskgroupOpGen, 2344 SourceLocation Loc) { 2345 if (!CGF.HaveInsertPoint()) 2346 return; 2347 // __kmpc_taskgroup(ident_t *, gtid); 2348 // TaskgroupOpGen(); 2349 // __kmpc_end_taskgroup(ident_t *, gtid); 2350 // Prepare arguments and build a call to __kmpc_taskgroup 2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2353 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2354 Args, 2355 OMPBuilder.getOrCreateRuntimeFunction( 2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2357 Args); 2358 TaskgroupOpGen.setAction(Action); 2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2360 } 2361 2362 /// Given an array of pointers to variables, project the address of a 2363 /// given variable. 2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2365 unsigned Index, const VarDecl *Var) { 2366 // Pull out the pointer to the variable. 2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2369 2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2371 Addr = CGF.Builder.CreateElementBitCast( 2372 Addr, CGF.ConvertTypeForMem(Var->getType())); 2373 return Addr; 2374 } 2375 2376 static llvm::Value *emitCopyprivateCopyFunction( 2377 CodeGenModule &CGM, llvm::Type *ArgsType, 2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2380 SourceLocation Loc) { 2381 ASTContext &C = CGM.getContext(); 2382 // void copy_func(void *LHSArg, void *RHSArg); 2383 FunctionArgList Args; 2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2387 ImplicitParamDecl::Other); 2388 Args.push_back(&LHSArg); 2389 Args.push_back(&RHSArg); 2390 const auto &CGFI = 2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2392 std::string Name = 2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2395 llvm::GlobalValue::InternalLinkage, Name, 2396 &CGM.getModule()); 2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2398 Fn->setDoesNotRecurse(); 2399 CodeGenFunction CGF(CGM); 2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2401 // Dest = (void*[n])(LHSArg); 2402 // Src = (void*[n])(RHSArg); 2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2405 ArgsType), CGF.getPointerAlign()); 2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2408 ArgsType), CGF.getPointerAlign()); 2409 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2410 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2411 // ... 2412 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2414 const auto *DestVar = 2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2417 2418 const auto *SrcVar = 2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2421 2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2423 QualType Type = VD->getType(); 2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2425 } 2426 CGF.FinishFunction(); 2427 return Fn; 2428 } 2429 2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2431 const RegionCodeGenTy &SingleOpGen, 2432 SourceLocation Loc, 2433 ArrayRef<const Expr *> CopyprivateVars, 2434 ArrayRef<const Expr *> SrcExprs, 2435 ArrayRef<const Expr *> DstExprs, 2436 ArrayRef<const Expr *> AssignmentOps) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 assert(CopyprivateVars.size() == SrcExprs.size() && 2440 CopyprivateVars.size() == DstExprs.size() && 2441 CopyprivateVars.size() == AssignmentOps.size()); 2442 ASTContext &C = CGM.getContext(); 2443 // int32 did_it = 0; 2444 // if(__kmpc_single(ident_t *, gtid)) { 2445 // SingleOpGen(); 2446 // __kmpc_end_single(ident_t *, gtid); 2447 // did_it = 1; 2448 // } 2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2450 // <copy_func>, did_it); 2451 2452 Address DidIt = Address::invalid(); 2453 if (!CopyprivateVars.empty()) { 2454 // int32 did_it = 0; 2455 QualType KmpInt32Ty = 2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2459 } 2460 // Prepare arguments and build a call to __kmpc_single 2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2462 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2463 CGM.getModule(), OMPRTL___kmpc_single), 2464 Args, 2465 OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_end_single), 2467 Args, 2468 /*Conditional=*/true); 2469 SingleOpGen.setAction(Action); 2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2471 if (DidIt.isValid()) { 2472 // did_it = 1; 2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2474 } 2475 Action.Done(CGF); 2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2477 // <copy_func>, did_it); 2478 if (DidIt.isValid()) { 2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2480 QualType CopyprivateArrayTy = C.getConstantArrayType( 2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2482 /*IndexTypeQuals=*/0); 2483 // Create a list of all private variables for copyprivate. 2484 Address CopyprivateList = 2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2488 CGF.Builder.CreateStore( 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2491 CGF.VoidPtrTy), 2492 Elem); 2493 } 2494 // Build function that copies private values from single region to all other 2495 // threads in the corresponding parallel region. 2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2500 Address CL = 2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2502 CGF.VoidPtrTy); 2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2504 llvm::Value *Args[] = { 2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2506 getThreadID(CGF, Loc), // i32 <gtid> 2507 BufSize, // size_t <buf_size> 2508 CL.getPointer(), // void *<copyprivate list> 2509 CpyFn, // void (*) (void *, void *) <copy_func> 2510 DidItVal // i32 did_it 2511 }; 2512 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2513 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2514 Args); 2515 } 2516 } 2517 2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2519 const RegionCodeGenTy &OrderedOpGen, 2520 SourceLocation Loc, bool IsThreads) { 2521 if (!CGF.HaveInsertPoint()) 2522 return; 2523 // __kmpc_ordered(ident_t *, gtid); 2524 // OrderedOpGen(); 2525 // __kmpc_end_ordered(ident_t *, gtid); 2526 // Prepare arguments and build a call to __kmpc_ordered 2527 if (IsThreads) { 2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2529 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2530 CGM.getModule(), OMPRTL___kmpc_ordered), 2531 Args, 2532 OMPBuilder.getOrCreateRuntimeFunction( 2533 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2534 Args); 2535 OrderedOpGen.setAction(Action); 2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2537 return; 2538 } 2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2540 } 2541 2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2543 unsigned Flags; 2544 if (Kind == OMPD_for) 2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2546 else if (Kind == OMPD_sections) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2548 else if (Kind == OMPD_single) 2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2550 else if (Kind == OMPD_barrier) 2551 Flags = OMP_IDENT_BARRIER_EXPL; 2552 else 2553 Flags = OMP_IDENT_BARRIER_IMPL; 2554 return Flags; 2555 } 2556 2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2558 CodeGenFunction &CGF, const OMPLoopDirective &S, 2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2560 // Check if the loop directive is actually a doacross loop directive. In this 2561 // case choose static, 1 schedule. 2562 if (llvm::any_of( 2563 S.getClausesOfKind<OMPOrderedClause>(), 2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2565 ScheduleKind = OMPC_SCHEDULE_static; 2566 // Chunk size is 1 in this case. 2567 llvm::APInt ChunkSize(32, 1); 2568 ChunkExpr = IntegerLiteral::Create( 2569 CGF.getContext(), ChunkSize, 2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2571 SourceLocation()); 2572 } 2573 } 2574 2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2576 OpenMPDirectiveKind Kind, bool EmitChecks, 2577 bool ForceSimpleCall) { 2578 // Check if we should use the OMPBuilder 2579 auto *OMPRegionInfo = 2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2581 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2582 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2583 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2584 return; 2585 } 2586 2587 if (!CGF.HaveInsertPoint()) 2588 return; 2589 // Build call __kmpc_cancel_barrier(loc, thread_id); 2590 // Build call __kmpc_barrier(loc, thread_id); 2591 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2592 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2593 // thread_id); 2594 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2595 getThreadID(CGF, Loc)}; 2596 if (OMPRegionInfo) { 2597 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2598 llvm::Value *Result = CGF.EmitRuntimeCall( 2599 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2600 OMPRTL___kmpc_cancel_barrier), 2601 Args); 2602 if (EmitChecks) { 2603 // if (__kmpc_cancel_barrier()) { 2604 // exit from construct; 2605 // } 2606 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2607 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2608 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2609 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2610 CGF.EmitBlock(ExitBB); 2611 // exit from construct; 2612 CodeGenFunction::JumpDest CancelDestination = 2613 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2614 CGF.EmitBranchThroughCleanup(CancelDestination); 2615 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2616 } 2617 return; 2618 } 2619 } 2620 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2621 CGM.getModule(), OMPRTL___kmpc_barrier), 2622 Args); 2623 } 2624 2625 /// Map the OpenMP loop schedule to the runtime enumeration. 2626 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2627 bool Chunked, bool Ordered) { 2628 switch (ScheduleKind) { 2629 case OMPC_SCHEDULE_static: 2630 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2631 : (Ordered ? OMP_ord_static : OMP_sch_static); 2632 case OMPC_SCHEDULE_dynamic: 2633 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2634 case OMPC_SCHEDULE_guided: 2635 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2636 case OMPC_SCHEDULE_runtime: 2637 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2638 case OMPC_SCHEDULE_auto: 2639 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2640 case OMPC_SCHEDULE_unknown: 2641 assert(!Chunked && "chunk was specified but schedule kind not known"); 2642 return Ordered ? OMP_ord_static : OMP_sch_static; 2643 } 2644 llvm_unreachable("Unexpected runtime schedule"); 2645 } 2646 2647 /// Map the OpenMP distribute schedule to the runtime enumeration. 2648 static OpenMPSchedType 2649 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2650 // only static is allowed for dist_schedule 2651 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2652 } 2653 2654 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2655 bool Chunked) const { 2656 OpenMPSchedType Schedule = 2657 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2658 return Schedule == OMP_sch_static; 2659 } 2660 2661 bool CGOpenMPRuntime::isStaticNonchunked( 2662 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2663 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2664 return Schedule == OMP_dist_sch_static; 2665 } 2666 2667 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2668 bool Chunked) const { 2669 OpenMPSchedType Schedule = 2670 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2671 return Schedule == OMP_sch_static_chunked; 2672 } 2673 2674 bool CGOpenMPRuntime::isStaticChunked( 2675 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2676 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2677 return Schedule == OMP_dist_sch_static_chunked; 2678 } 2679 2680 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2681 OpenMPSchedType Schedule = 2682 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2683 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2684 return Schedule != OMP_sch_static; 2685 } 2686 2687 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2688 OpenMPScheduleClauseModifier M1, 2689 OpenMPScheduleClauseModifier M2) { 2690 int Modifier = 0; 2691 switch (M1) { 2692 case OMPC_SCHEDULE_MODIFIER_monotonic: 2693 Modifier = OMP_sch_modifier_monotonic; 2694 break; 2695 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2696 Modifier = OMP_sch_modifier_nonmonotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_simd: 2699 if (Schedule == OMP_sch_static_chunked) 2700 Schedule = OMP_sch_static_balanced_chunked; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_last: 2703 case OMPC_SCHEDULE_MODIFIER_unknown: 2704 break; 2705 } 2706 switch (M2) { 2707 case OMPC_SCHEDULE_MODIFIER_monotonic: 2708 Modifier = OMP_sch_modifier_monotonic; 2709 break; 2710 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2711 Modifier = OMP_sch_modifier_nonmonotonic; 2712 break; 2713 case OMPC_SCHEDULE_MODIFIER_simd: 2714 if (Schedule == OMP_sch_static_chunked) 2715 Schedule = OMP_sch_static_balanced_chunked; 2716 break; 2717 case OMPC_SCHEDULE_MODIFIER_last: 2718 case OMPC_SCHEDULE_MODIFIER_unknown: 2719 break; 2720 } 2721 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2722 // If the static schedule kind is specified or if the ordered clause is 2723 // specified, and if the nonmonotonic modifier is not specified, the effect is 2724 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2725 // modifier is specified, the effect is as if the nonmonotonic modifier is 2726 // specified. 2727 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2728 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2729 Schedule == OMP_sch_static_balanced_chunked || 2730 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2731 Schedule == OMP_dist_sch_static_chunked || 2732 Schedule == OMP_dist_sch_static)) 2733 Modifier = OMP_sch_modifier_nonmonotonic; 2734 } 2735 return Schedule | Modifier; 2736 } 2737 2738 void CGOpenMPRuntime::emitForDispatchInit( 2739 CodeGenFunction &CGF, SourceLocation Loc, 2740 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2741 bool Ordered, const DispatchRTInput &DispatchValues) { 2742 if (!CGF.HaveInsertPoint()) 2743 return; 2744 OpenMPSchedType Schedule = getRuntimeSchedule( 2745 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2746 assert(Ordered || 2747 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2748 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2749 Schedule != OMP_sch_static_balanced_chunked)); 2750 // Call __kmpc_dispatch_init( 2751 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2752 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2753 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2754 2755 // If the Chunk was not specified in the clause - use default value 1. 2756 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2757 : CGF.Builder.getIntN(IVSize, 1); 2758 llvm::Value *Args[] = { 2759 emitUpdateLocation(CGF, Loc), 2760 getThreadID(CGF, Loc), 2761 CGF.Builder.getInt32(addMonoNonMonoModifier( 2762 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2763 DispatchValues.LB, // Lower 2764 DispatchValues.UB, // Upper 2765 CGF.Builder.getIntN(IVSize, 1), // Stride 2766 Chunk // Chunk 2767 }; 2768 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2769 } 2770 2771 static void emitForStaticInitCall( 2772 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2773 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2774 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2775 const CGOpenMPRuntime::StaticRTInput &Values) { 2776 if (!CGF.HaveInsertPoint()) 2777 return; 2778 2779 assert(!Values.Ordered); 2780 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2781 Schedule == OMP_sch_static_balanced_chunked || 2782 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2783 Schedule == OMP_dist_sch_static || 2784 Schedule == OMP_dist_sch_static_chunked); 2785 2786 // Call __kmpc_for_static_init( 2787 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2788 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2789 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2790 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2791 llvm::Value *Chunk = Values.Chunk; 2792 if (Chunk == nullptr) { 2793 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2794 Schedule == OMP_dist_sch_static) && 2795 "expected static non-chunked schedule"); 2796 // If the Chunk was not specified in the clause - use default value 1. 2797 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2798 } else { 2799 assert((Schedule == OMP_sch_static_chunked || 2800 Schedule == OMP_sch_static_balanced_chunked || 2801 Schedule == OMP_ord_static_chunked || 2802 Schedule == OMP_dist_sch_static_chunked) && 2803 "expected static chunked schedule"); 2804 } 2805 llvm::Value *Args[] = { 2806 UpdateLocation, 2807 ThreadId, 2808 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2809 M2)), // Schedule type 2810 Values.IL.getPointer(), // &isLastIter 2811 Values.LB.getPointer(), // &LB 2812 Values.UB.getPointer(), // &UB 2813 Values.ST.getPointer(), // &Stride 2814 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2815 Chunk // Chunk 2816 }; 2817 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2818 } 2819 2820 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2821 SourceLocation Loc, 2822 OpenMPDirectiveKind DKind, 2823 const OpenMPScheduleTy &ScheduleKind, 2824 const StaticRTInput &Values) { 2825 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2826 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2827 assert(isOpenMPWorksharingDirective(DKind) && 2828 "Expected loop-based or sections-based directive."); 2829 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2830 isOpenMPLoopDirective(DKind) 2831 ? OMP_IDENT_WORK_LOOP 2832 : OMP_IDENT_WORK_SECTIONS); 2833 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2834 llvm::FunctionCallee StaticInitFunction = 2835 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2836 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2837 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2838 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2839 } 2840 2841 void CGOpenMPRuntime::emitDistributeStaticInit( 2842 CodeGenFunction &CGF, SourceLocation Loc, 2843 OpenMPDistScheduleClauseKind SchedKind, 2844 const CGOpenMPRuntime::StaticRTInput &Values) { 2845 OpenMPSchedType ScheduleNum = 2846 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2847 llvm::Value *UpdatedLocation = 2848 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2849 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2850 llvm::FunctionCallee StaticInitFunction; 2851 bool isGPUDistribute = 2852 CGM.getLangOpts().OpenMPIsDevice && 2853 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2854 StaticInitFunction = createForStaticInitFunction( 2855 Values.IVSize, Values.IVSigned, isGPUDistribute); 2856 2857 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2858 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2859 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2860 } 2861 2862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2863 SourceLocation Loc, 2864 OpenMPDirectiveKind DKind) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2868 llvm::Value *Args[] = { 2869 emitUpdateLocation(CGF, Loc, 2870 isOpenMPDistributeDirective(DKind) 2871 ? OMP_IDENT_WORK_DISTRIBUTE 2872 : isOpenMPLoopDirective(DKind) 2873 ? OMP_IDENT_WORK_LOOP 2874 : OMP_IDENT_WORK_SECTIONS), 2875 getThreadID(CGF, Loc)}; 2876 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2877 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2878 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2879 CGF.EmitRuntimeCall( 2880 OMPBuilder.getOrCreateRuntimeFunction( 2881 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2882 Args); 2883 else 2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2885 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2886 Args); 2887 } 2888 2889 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2890 SourceLocation Loc, 2891 unsigned IVSize, 2892 bool IVSigned) { 2893 if (!CGF.HaveInsertPoint()) 2894 return; 2895 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2896 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2897 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2898 } 2899 2900 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2901 SourceLocation Loc, unsigned IVSize, 2902 bool IVSigned, Address IL, 2903 Address LB, Address UB, 2904 Address ST) { 2905 // Call __kmpc_dispatch_next( 2906 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2907 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2908 // kmp_int[32|64] *p_stride); 2909 llvm::Value *Args[] = { 2910 emitUpdateLocation(CGF, Loc), 2911 getThreadID(CGF, Loc), 2912 IL.getPointer(), // &isLastIter 2913 LB.getPointer(), // &Lower 2914 UB.getPointer(), // &Upper 2915 ST.getPointer() // &Stride 2916 }; 2917 llvm::Value *Call = 2918 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2919 return CGF.EmitScalarConversion( 2920 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2921 CGF.getContext().BoolTy, Loc); 2922 } 2923 2924 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2925 llvm::Value *NumThreads, 2926 SourceLocation Loc) { 2927 if (!CGF.HaveInsertPoint()) 2928 return; 2929 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2930 llvm::Value *Args[] = { 2931 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2932 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2933 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2934 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2935 Args); 2936 } 2937 2938 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2939 ProcBindKind ProcBind, 2940 SourceLocation Loc) { 2941 if (!CGF.HaveInsertPoint()) 2942 return; 2943 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2944 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2945 llvm::Value *Args[] = { 2946 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2947 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2948 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2949 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2950 Args); 2951 } 2952 2953 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2954 SourceLocation Loc, llvm::AtomicOrdering AO) { 2955 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2956 OMPBuilder.createFlush(CGF.Builder); 2957 } else { 2958 if (!CGF.HaveInsertPoint()) 2959 return; 2960 // Build call void __kmpc_flush(ident_t *loc) 2961 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2962 CGM.getModule(), OMPRTL___kmpc_flush), 2963 emitUpdateLocation(CGF, Loc)); 2964 } 2965 } 2966 2967 namespace { 2968 /// Indexes of fields for type kmp_task_t. 2969 enum KmpTaskTFields { 2970 /// List of shared variables. 2971 KmpTaskTShareds, 2972 /// Task routine. 2973 KmpTaskTRoutine, 2974 /// Partition id for the untied tasks. 2975 KmpTaskTPartId, 2976 /// Function with call of destructors for private variables. 2977 Data1, 2978 /// Task priority. 2979 Data2, 2980 /// (Taskloops only) Lower bound. 2981 KmpTaskTLowerBound, 2982 /// (Taskloops only) Upper bound. 2983 KmpTaskTUpperBound, 2984 /// (Taskloops only) Stride. 2985 KmpTaskTStride, 2986 /// (Taskloops only) Is last iteration flag. 2987 KmpTaskTLastIter, 2988 /// (Taskloops only) Reduction data. 2989 KmpTaskTReductions, 2990 }; 2991 } // anonymous namespace 2992 2993 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2994 return OffloadEntriesTargetRegion.empty() && 2995 OffloadEntriesDeviceGlobalVar.empty(); 2996 } 2997 2998 /// Initialize target region entry. 2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3000 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3001 StringRef ParentName, unsigned LineNum, 3002 unsigned Order) { 3003 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3004 "only required for the device " 3005 "code generation."); 3006 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3007 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3008 OMPTargetRegionEntryTargetRegion); 3009 ++OffloadingEntriesNum; 3010 } 3011 3012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3013 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3014 StringRef ParentName, unsigned LineNum, 3015 llvm::Constant *Addr, llvm::Constant *ID, 3016 OMPTargetRegionEntryKind Flags) { 3017 // If we are emitting code for a target, the entry is already initialized, 3018 // only has to be registered. 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 // This could happen if the device compilation is invoked standalone. 3021 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3022 return; 3023 auto &Entry = 3024 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3025 Entry.setAddress(Addr); 3026 Entry.setID(ID); 3027 Entry.setFlags(Flags); 3028 } else { 3029 if (Flags == 3030 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3031 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3032 /*IgnoreAddressId*/ true)) 3033 return; 3034 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3035 "Target region entry already registered!"); 3036 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3037 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3038 ++OffloadingEntriesNum; 3039 } 3040 } 3041 3042 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3043 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3044 bool IgnoreAddressId) const { 3045 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3046 if (PerDevice == OffloadEntriesTargetRegion.end()) 3047 return false; 3048 auto PerFile = PerDevice->second.find(FileID); 3049 if (PerFile == PerDevice->second.end()) 3050 return false; 3051 auto PerParentName = PerFile->second.find(ParentName); 3052 if (PerParentName == PerFile->second.end()) 3053 return false; 3054 auto PerLine = PerParentName->second.find(LineNum); 3055 if (PerLine == PerParentName->second.end()) 3056 return false; 3057 // Fail if this entry is already registered. 3058 if (!IgnoreAddressId && 3059 (PerLine->second.getAddress() || PerLine->second.getID())) 3060 return false; 3061 return true; 3062 } 3063 3064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3065 const OffloadTargetRegionEntryInfoActTy &Action) { 3066 // Scan all target region entries and perform the provided action. 3067 for (const auto &D : OffloadEntriesTargetRegion) 3068 for (const auto &F : D.second) 3069 for (const auto &P : F.second) 3070 for (const auto &L : P.second) 3071 Action(D.first, F.first, P.first(), L.first, L.second); 3072 } 3073 3074 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3075 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3076 OMPTargetGlobalVarEntryKind Flags, 3077 unsigned Order) { 3078 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3079 "only required for the device " 3080 "code generation."); 3081 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3082 ++OffloadingEntriesNum; 3083 } 3084 3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3086 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3087 CharUnits VarSize, 3088 OMPTargetGlobalVarEntryKind Flags, 3089 llvm::GlobalValue::LinkageTypes Linkage) { 3090 if (CGM.getLangOpts().OpenMPIsDevice) { 3091 // This could happen if the device compilation is invoked standalone. 3092 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3093 return; 3094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3095 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3096 if (Entry.getVarSize().isZero()) { 3097 Entry.setVarSize(VarSize); 3098 Entry.setLinkage(Linkage); 3099 } 3100 return; 3101 } 3102 Entry.setVarSize(VarSize); 3103 Entry.setLinkage(Linkage); 3104 Entry.setAddress(Addr); 3105 } else { 3106 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3107 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3108 assert(Entry.isValid() && Entry.getFlags() == Flags && 3109 "Entry not initialized!"); 3110 if (Entry.getVarSize().isZero()) { 3111 Entry.setVarSize(VarSize); 3112 Entry.setLinkage(Linkage); 3113 } 3114 return; 3115 } 3116 OffloadEntriesDeviceGlobalVar.try_emplace( 3117 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3118 ++OffloadingEntriesNum; 3119 } 3120 } 3121 3122 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3123 actOnDeviceGlobalVarEntriesInfo( 3124 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3125 // Scan all target region entries and perform the provided action. 3126 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3127 Action(E.getKey(), E.getValue()); 3128 } 3129 3130 void CGOpenMPRuntime::createOffloadEntry( 3131 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3132 llvm::GlobalValue::LinkageTypes Linkage) { 3133 StringRef Name = Addr->getName(); 3134 llvm::Module &M = CGM.getModule(); 3135 llvm::LLVMContext &C = M.getContext(); 3136 3137 // Create constant string with the name. 3138 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3139 3140 std::string StringName = getName({"omp_offloading", "entry_name"}); 3141 auto *Str = new llvm::GlobalVariable( 3142 M, StrPtrInit->getType(), /*isConstant=*/true, 3143 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3144 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3145 3146 llvm::Constant *Data[] = { 3147 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3148 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3149 llvm::ConstantInt::get(CGM.SizeTy, Size), 3150 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3151 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3152 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3153 llvm::GlobalVariable *Entry = createGlobalStruct( 3154 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3155 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3156 3157 // The entry has to be created in the section the linker expects it to be. 3158 Entry->setSection("omp_offloading_entries"); 3159 } 3160 3161 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3162 // Emit the offloading entries and metadata so that the device codegen side 3163 // can easily figure out what to emit. The produced metadata looks like 3164 // this: 3165 // 3166 // !omp_offload.info = !{!1, ...} 3167 // 3168 // Right now we only generate metadata for function that contain target 3169 // regions. 3170 3171 // If we are in simd mode or there are no entries, we don't need to do 3172 // anything. 3173 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3174 return; 3175 3176 llvm::Module &M = CGM.getModule(); 3177 llvm::LLVMContext &C = M.getContext(); 3178 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3179 SourceLocation, StringRef>, 3180 16> 3181 OrderedEntries(OffloadEntriesInfoManager.size()); 3182 llvm::SmallVector<StringRef, 16> ParentFunctions( 3183 OffloadEntriesInfoManager.size()); 3184 3185 // Auxiliary methods to create metadata values and strings. 3186 auto &&GetMDInt = [this](unsigned V) { 3187 return llvm::ConstantAsMetadata::get( 3188 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3189 }; 3190 3191 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3192 3193 // Create the offloading info metadata node. 3194 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3195 3196 // Create function that emits metadata for each target region entry; 3197 auto &&TargetRegionMetadataEmitter = 3198 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3199 &GetMDString]( 3200 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3201 unsigned Line, 3202 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3203 // Generate metadata for target regions. Each entry of this metadata 3204 // contains: 3205 // - Entry 0 -> Kind of this type of metadata (0). 3206 // - Entry 1 -> Device ID of the file where the entry was identified. 3207 // - Entry 2 -> File ID of the file where the entry was identified. 3208 // - Entry 3 -> Mangled name of the function where the entry was 3209 // identified. 3210 // - Entry 4 -> Line in the file where the entry was identified. 3211 // - Entry 5 -> Order the entry was created. 3212 // The first element of the metadata node is the kind. 3213 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3214 GetMDInt(FileID), GetMDString(ParentName), 3215 GetMDInt(Line), GetMDInt(E.getOrder())}; 3216 3217 SourceLocation Loc; 3218 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3219 E = CGM.getContext().getSourceManager().fileinfo_end(); 3220 I != E; ++I) { 3221 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3222 I->getFirst()->getUniqueID().getFile() == FileID) { 3223 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3224 I->getFirst(), Line, 1); 3225 break; 3226 } 3227 } 3228 // Save this entry in the right position of the ordered entries array. 3229 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3230 ParentFunctions[E.getOrder()] = ParentName; 3231 3232 // Add metadata to the named metadata node. 3233 MD->addOperand(llvm::MDNode::get(C, Ops)); 3234 }; 3235 3236 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3237 TargetRegionMetadataEmitter); 3238 3239 // Create function that emits metadata for each device global variable entry; 3240 auto &&DeviceGlobalVarMetadataEmitter = 3241 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3242 MD](StringRef MangledName, 3243 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3244 &E) { 3245 // Generate metadata for global variables. Each entry of this metadata 3246 // contains: 3247 // - Entry 0 -> Kind of this type of metadata (1). 3248 // - Entry 1 -> Mangled name of the variable. 3249 // - Entry 2 -> Declare target kind. 3250 // - Entry 3 -> Order the entry was created. 3251 // The first element of the metadata node is the kind. 3252 llvm::Metadata *Ops[] = { 3253 GetMDInt(E.getKind()), GetMDString(MangledName), 3254 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3255 3256 // Save this entry in the right position of the ordered entries array. 3257 OrderedEntries[E.getOrder()] = 3258 std::make_tuple(&E, SourceLocation(), MangledName); 3259 3260 // Add metadata to the named metadata node. 3261 MD->addOperand(llvm::MDNode::get(C, Ops)); 3262 }; 3263 3264 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3265 DeviceGlobalVarMetadataEmitter); 3266 3267 for (const auto &E : OrderedEntries) { 3268 assert(std::get<0>(E) && "All ordered entries must exist!"); 3269 if (const auto *CE = 3270 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3271 std::get<0>(E))) { 3272 if (!CE->getID() || !CE->getAddress()) { 3273 // Do not blame the entry if the parent funtion is not emitted. 3274 StringRef FnName = ParentFunctions[CE->getOrder()]; 3275 if (!CGM.GetGlobalValue(FnName)) 3276 continue; 3277 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3278 DiagnosticsEngine::Error, 3279 "Offloading entry for target region in %0 is incorrect: either the " 3280 "address or the ID is invalid."); 3281 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3282 continue; 3283 } 3284 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3285 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3286 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3287 OffloadEntryInfoDeviceGlobalVar>( 3288 std::get<0>(E))) { 3289 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3290 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3291 CE->getFlags()); 3292 switch (Flags) { 3293 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3294 if (CGM.getLangOpts().OpenMPIsDevice && 3295 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3296 continue; 3297 if (!CE->getAddress()) { 3298 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3299 DiagnosticsEngine::Error, "Offloading entry for declare target " 3300 "variable %0 is incorrect: the " 3301 "address is invalid."); 3302 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3303 continue; 3304 } 3305 // The vaiable has no definition - no need to add the entry. 3306 if (CE->getVarSize().isZero()) 3307 continue; 3308 break; 3309 } 3310 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3311 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3312 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3313 "Declaret target link address is set."); 3314 if (CGM.getLangOpts().OpenMPIsDevice) 3315 continue; 3316 if (!CE->getAddress()) { 3317 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3318 DiagnosticsEngine::Error, 3319 "Offloading entry for declare target variable is incorrect: the " 3320 "address is invalid."); 3321 CGM.getDiags().Report(DiagID); 3322 continue; 3323 } 3324 break; 3325 } 3326 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3327 CE->getVarSize().getQuantity(), Flags, 3328 CE->getLinkage()); 3329 } else { 3330 llvm_unreachable("Unsupported entry kind."); 3331 } 3332 } 3333 } 3334 3335 /// Loads all the offload entries information from the host IR 3336 /// metadata. 3337 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3338 // If we are in target mode, load the metadata from the host IR. This code has 3339 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3340 3341 if (!CGM.getLangOpts().OpenMPIsDevice) 3342 return; 3343 3344 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3345 return; 3346 3347 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3348 if (auto EC = Buf.getError()) { 3349 CGM.getDiags().Report(diag::err_cannot_open_file) 3350 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3351 return; 3352 } 3353 3354 llvm::LLVMContext C; 3355 auto ME = expectedToErrorOrAndEmitErrors( 3356 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3357 3358 if (auto EC = ME.getError()) { 3359 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3360 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3361 CGM.getDiags().Report(DiagID) 3362 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3363 return; 3364 } 3365 3366 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3367 if (!MD) 3368 return; 3369 3370 for (llvm::MDNode *MN : MD->operands()) { 3371 auto &&GetMDInt = [MN](unsigned Idx) { 3372 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3373 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3374 }; 3375 3376 auto &&GetMDString = [MN](unsigned Idx) { 3377 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3378 return V->getString(); 3379 }; 3380 3381 switch (GetMDInt(0)) { 3382 default: 3383 llvm_unreachable("Unexpected metadata!"); 3384 break; 3385 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3386 OffloadingEntryInfoTargetRegion: 3387 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3388 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3389 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3390 /*Order=*/GetMDInt(5)); 3391 break; 3392 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3393 OffloadingEntryInfoDeviceGlobalVar: 3394 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3395 /*MangledName=*/GetMDString(1), 3396 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3397 /*Flags=*/GetMDInt(2)), 3398 /*Order=*/GetMDInt(3)); 3399 break; 3400 } 3401 } 3402 } 3403 3404 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3405 if (!KmpRoutineEntryPtrTy) { 3406 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3407 ASTContext &C = CGM.getContext(); 3408 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3409 FunctionProtoType::ExtProtoInfo EPI; 3410 KmpRoutineEntryPtrQTy = C.getPointerType( 3411 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3412 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3413 } 3414 } 3415 3416 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3417 // Make sure the type of the entry is already created. This is the type we 3418 // have to create: 3419 // struct __tgt_offload_entry{ 3420 // void *addr; // Pointer to the offload entry info. 3421 // // (function or global) 3422 // char *name; // Name of the function or global. 3423 // size_t size; // Size of the entry info (0 if it a function). 3424 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3425 // int32_t reserved; // Reserved, to use by the runtime library. 3426 // }; 3427 if (TgtOffloadEntryQTy.isNull()) { 3428 ASTContext &C = CGM.getContext(); 3429 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3430 RD->startDefinition(); 3431 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3432 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3433 addFieldToRecordDecl(C, RD, C.getSizeType()); 3434 addFieldToRecordDecl( 3435 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3436 addFieldToRecordDecl( 3437 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3438 RD->completeDefinition(); 3439 RD->addAttr(PackedAttr::CreateImplicit(C)); 3440 TgtOffloadEntryQTy = C.getRecordType(RD); 3441 } 3442 return TgtOffloadEntryQTy; 3443 } 3444 3445 namespace { 3446 struct PrivateHelpersTy { 3447 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3448 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3449 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3450 PrivateElemInit(PrivateElemInit) {} 3451 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3452 const Expr *OriginalRef = nullptr; 3453 const VarDecl *Original = nullptr; 3454 const VarDecl *PrivateCopy = nullptr; 3455 const VarDecl *PrivateElemInit = nullptr; 3456 bool isLocalPrivate() const { 3457 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3458 } 3459 }; 3460 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3461 } // anonymous namespace 3462 3463 static bool isAllocatableDecl(const VarDecl *VD) { 3464 const VarDecl *CVD = VD->getCanonicalDecl(); 3465 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3466 return false; 3467 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3468 // Use the default allocation. 3469 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 3470 !AA->getAllocator()); 3471 } 3472 3473 static RecordDecl * 3474 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3475 if (!Privates.empty()) { 3476 ASTContext &C = CGM.getContext(); 3477 // Build struct .kmp_privates_t. { 3478 // /* private vars */ 3479 // }; 3480 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3481 RD->startDefinition(); 3482 for (const auto &Pair : Privates) { 3483 const VarDecl *VD = Pair.second.Original; 3484 QualType Type = VD->getType().getNonReferenceType(); 3485 // If the private variable is a local variable with lvalue ref type, 3486 // allocate the pointer instead of the pointee type. 3487 if (Pair.second.isLocalPrivate()) { 3488 if (VD->getType()->isLValueReferenceType()) 3489 Type = C.getPointerType(Type); 3490 if (isAllocatableDecl(VD)) 3491 Type = C.getPointerType(Type); 3492 } 3493 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3494 if (VD->hasAttrs()) { 3495 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3496 E(VD->getAttrs().end()); 3497 I != E; ++I) 3498 FD->addAttr(*I); 3499 } 3500 } 3501 RD->completeDefinition(); 3502 return RD; 3503 } 3504 return nullptr; 3505 } 3506 3507 static RecordDecl * 3508 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3509 QualType KmpInt32Ty, 3510 QualType KmpRoutineEntryPointerQTy) { 3511 ASTContext &C = CGM.getContext(); 3512 // Build struct kmp_task_t { 3513 // void * shareds; 3514 // kmp_routine_entry_t routine; 3515 // kmp_int32 part_id; 3516 // kmp_cmplrdata_t data1; 3517 // kmp_cmplrdata_t data2; 3518 // For taskloops additional fields: 3519 // kmp_uint64 lb; 3520 // kmp_uint64 ub; 3521 // kmp_int64 st; 3522 // kmp_int32 liter; 3523 // void * reductions; 3524 // }; 3525 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3526 UD->startDefinition(); 3527 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3528 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3529 UD->completeDefinition(); 3530 QualType KmpCmplrdataTy = C.getRecordType(UD); 3531 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3532 RD->startDefinition(); 3533 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3534 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3535 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3536 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3537 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3538 if (isOpenMPTaskLoopDirective(Kind)) { 3539 QualType KmpUInt64Ty = 3540 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3541 QualType KmpInt64Ty = 3542 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3543 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3544 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3545 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3546 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3547 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3548 } 3549 RD->completeDefinition(); 3550 return RD; 3551 } 3552 3553 static RecordDecl * 3554 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3555 ArrayRef<PrivateDataTy> Privates) { 3556 ASTContext &C = CGM.getContext(); 3557 // Build struct kmp_task_t_with_privates { 3558 // kmp_task_t task_data; 3559 // .kmp_privates_t. privates; 3560 // }; 3561 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3562 RD->startDefinition(); 3563 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3564 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3565 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3566 RD->completeDefinition(); 3567 return RD; 3568 } 3569 3570 /// Emit a proxy function which accepts kmp_task_t as the second 3571 /// argument. 3572 /// \code 3573 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3574 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3575 /// For taskloops: 3576 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3577 /// tt->reductions, tt->shareds); 3578 /// return 0; 3579 /// } 3580 /// \endcode 3581 static llvm::Function * 3582 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3583 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3584 QualType KmpTaskTWithPrivatesPtrQTy, 3585 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3586 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3587 llvm::Value *TaskPrivatesMap) { 3588 ASTContext &C = CGM.getContext(); 3589 FunctionArgList Args; 3590 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3591 ImplicitParamDecl::Other); 3592 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3593 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3594 ImplicitParamDecl::Other); 3595 Args.push_back(&GtidArg); 3596 Args.push_back(&TaskTypeArg); 3597 const auto &TaskEntryFnInfo = 3598 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3599 llvm::FunctionType *TaskEntryTy = 3600 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3601 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3602 auto *TaskEntry = llvm::Function::Create( 3603 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3604 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3605 TaskEntry->setDoesNotRecurse(); 3606 CodeGenFunction CGF(CGM); 3607 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3608 Loc, Loc); 3609 3610 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3611 // tt, 3612 // For taskloops: 3613 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3614 // tt->task_data.shareds); 3615 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3616 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3617 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3618 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3619 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3620 const auto *KmpTaskTWithPrivatesQTyRD = 3621 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3622 LValue Base = 3623 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3624 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3625 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3626 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3627 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3628 3629 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3630 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3631 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3632 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3633 CGF.ConvertTypeForMem(SharedsPtrTy)); 3634 3635 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3636 llvm::Value *PrivatesParam; 3637 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3638 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3639 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3640 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3641 } else { 3642 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3643 } 3644 3645 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3646 TaskPrivatesMap, 3647 CGF.Builder 3648 .CreatePointerBitCastOrAddrSpaceCast( 3649 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3650 .getPointer()}; 3651 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3652 std::end(CommonArgs)); 3653 if (isOpenMPTaskLoopDirective(Kind)) { 3654 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3655 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3656 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3657 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3658 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3659 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3660 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3661 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3662 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3663 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3664 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3665 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3666 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3667 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3668 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3669 CallArgs.push_back(LBParam); 3670 CallArgs.push_back(UBParam); 3671 CallArgs.push_back(StParam); 3672 CallArgs.push_back(LIParam); 3673 CallArgs.push_back(RParam); 3674 } 3675 CallArgs.push_back(SharedsParam); 3676 3677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3678 CallArgs); 3679 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3680 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3681 CGF.FinishFunction(); 3682 return TaskEntry; 3683 } 3684 3685 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3686 SourceLocation Loc, 3687 QualType KmpInt32Ty, 3688 QualType KmpTaskTWithPrivatesPtrQTy, 3689 QualType KmpTaskTWithPrivatesQTy) { 3690 ASTContext &C = CGM.getContext(); 3691 FunctionArgList Args; 3692 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3693 ImplicitParamDecl::Other); 3694 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3695 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3696 ImplicitParamDecl::Other); 3697 Args.push_back(&GtidArg); 3698 Args.push_back(&TaskTypeArg); 3699 const auto &DestructorFnInfo = 3700 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3701 llvm::FunctionType *DestructorFnTy = 3702 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3703 std::string Name = 3704 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3705 auto *DestructorFn = 3706 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3707 Name, &CGM.getModule()); 3708 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3709 DestructorFnInfo); 3710 DestructorFn->setDoesNotRecurse(); 3711 CodeGenFunction CGF(CGM); 3712 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3713 Args, Loc, Loc); 3714 3715 LValue Base = CGF.EmitLoadOfPointerLValue( 3716 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3717 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3718 const auto *KmpTaskTWithPrivatesQTyRD = 3719 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3720 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3721 Base = CGF.EmitLValueForField(Base, *FI); 3722 for (const auto *Field : 3723 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3724 if (QualType::DestructionKind DtorKind = 3725 Field->getType().isDestructedType()) { 3726 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3727 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3728 } 3729 } 3730 CGF.FinishFunction(); 3731 return DestructorFn; 3732 } 3733 3734 /// Emit a privates mapping function for correct handling of private and 3735 /// firstprivate variables. 3736 /// \code 3737 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3738 /// **noalias priv1,..., <tyn> **noalias privn) { 3739 /// *priv1 = &.privates.priv1; 3740 /// ...; 3741 /// *privn = &.privates.privn; 3742 /// } 3743 /// \endcode 3744 static llvm::Value * 3745 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3746 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3747 ArrayRef<PrivateDataTy> Privates) { 3748 ASTContext &C = CGM.getContext(); 3749 FunctionArgList Args; 3750 ImplicitParamDecl TaskPrivatesArg( 3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3752 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3753 ImplicitParamDecl::Other); 3754 Args.push_back(&TaskPrivatesArg); 3755 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3756 unsigned Counter = 1; 3757 for (const Expr *E : Data.PrivateVars) { 3758 Args.push_back(ImplicitParamDecl::Create( 3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3760 C.getPointerType(C.getPointerType(E->getType())) 3761 .withConst() 3762 .withRestrict(), 3763 ImplicitParamDecl::Other)); 3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3765 PrivateVarsPos[VD] = Counter; 3766 ++Counter; 3767 } 3768 for (const Expr *E : Data.FirstprivateVars) { 3769 Args.push_back(ImplicitParamDecl::Create( 3770 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3771 C.getPointerType(C.getPointerType(E->getType())) 3772 .withConst() 3773 .withRestrict(), 3774 ImplicitParamDecl::Other)); 3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3776 PrivateVarsPos[VD] = Counter; 3777 ++Counter; 3778 } 3779 for (const Expr *E : Data.LastprivateVars) { 3780 Args.push_back(ImplicitParamDecl::Create( 3781 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3782 C.getPointerType(C.getPointerType(E->getType())) 3783 .withConst() 3784 .withRestrict(), 3785 ImplicitParamDecl::Other)); 3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3787 PrivateVarsPos[VD] = Counter; 3788 ++Counter; 3789 } 3790 for (const VarDecl *VD : Data.PrivateLocals) { 3791 QualType Ty = VD->getType().getNonReferenceType(); 3792 if (VD->getType()->isLValueReferenceType()) 3793 Ty = C.getPointerType(Ty); 3794 if (isAllocatableDecl(VD)) 3795 Ty = C.getPointerType(Ty); 3796 Args.push_back(ImplicitParamDecl::Create( 3797 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3798 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3799 ImplicitParamDecl::Other)); 3800 PrivateVarsPos[VD] = Counter; 3801 ++Counter; 3802 } 3803 const auto &TaskPrivatesMapFnInfo = 3804 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3805 llvm::FunctionType *TaskPrivatesMapTy = 3806 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3807 std::string Name = 3808 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3809 auto *TaskPrivatesMap = llvm::Function::Create( 3810 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3811 &CGM.getModule()); 3812 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3813 TaskPrivatesMapFnInfo); 3814 if (CGM.getLangOpts().Optimize) { 3815 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3816 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3817 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3818 } 3819 CodeGenFunction CGF(CGM); 3820 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3821 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3822 3823 // *privi = &.privates.privi; 3824 LValue Base = CGF.EmitLoadOfPointerLValue( 3825 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3826 TaskPrivatesArg.getType()->castAs<PointerType>()); 3827 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3828 Counter = 0; 3829 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3830 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3831 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3832 LValue RefLVal = 3833 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3834 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3835 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3836 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3837 ++Counter; 3838 } 3839 CGF.FinishFunction(); 3840 return TaskPrivatesMap; 3841 } 3842 3843 /// Emit initialization for private variables in task-based directives. 3844 static void emitPrivatesInit(CodeGenFunction &CGF, 3845 const OMPExecutableDirective &D, 3846 Address KmpTaskSharedsPtr, LValue TDBase, 3847 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3848 QualType SharedsTy, QualType SharedsPtrTy, 3849 const OMPTaskDataTy &Data, 3850 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3851 ASTContext &C = CGF.getContext(); 3852 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3853 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3854 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3855 ? OMPD_taskloop 3856 : OMPD_task; 3857 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3858 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3859 LValue SrcBase; 3860 bool IsTargetTask = 3861 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3862 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3863 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3864 // PointersArray, SizesArray, and MappersArray. The original variables for 3865 // these arrays are not captured and we get their addresses explicitly. 3866 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3867 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3868 SrcBase = CGF.MakeAddrLValue( 3869 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3870 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3871 SharedsTy); 3872 } 3873 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3874 for (const PrivateDataTy &Pair : Privates) { 3875 // Do not initialize private locals. 3876 if (Pair.second.isLocalPrivate()) { 3877 ++FI; 3878 continue; 3879 } 3880 const VarDecl *VD = Pair.second.PrivateCopy; 3881 const Expr *Init = VD->getAnyInitializer(); 3882 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3883 !CGF.isTrivialInitializer(Init)))) { 3884 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3885 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3886 const VarDecl *OriginalVD = Pair.second.Original; 3887 // Check if the variable is the target-based BasePointersArray, 3888 // PointersArray, SizesArray, or MappersArray. 3889 LValue SharedRefLValue; 3890 QualType Type = PrivateLValue.getType(); 3891 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3892 if (IsTargetTask && !SharedField) { 3893 assert(isa<ImplicitParamDecl>(OriginalVD) && 3894 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3895 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3896 ->getNumParams() == 0 && 3897 isa<TranslationUnitDecl>( 3898 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3899 ->getDeclContext()) && 3900 "Expected artificial target data variable."); 3901 SharedRefLValue = 3902 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3903 } else if (ForDup) { 3904 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3905 SharedRefLValue = CGF.MakeAddrLValue( 3906 Address(SharedRefLValue.getPointer(CGF), 3907 C.getDeclAlign(OriginalVD)), 3908 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3909 SharedRefLValue.getTBAAInfo()); 3910 } else if (CGF.LambdaCaptureFields.count( 3911 Pair.second.Original->getCanonicalDecl()) > 0 || 3912 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3913 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3914 } else { 3915 // Processing for implicitly captured variables. 3916 InlinedOpenMPRegionRAII Region( 3917 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3918 /*HasCancel=*/false, /*NoInheritance=*/true); 3919 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3920 } 3921 if (Type->isArrayType()) { 3922 // Initialize firstprivate array. 3923 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3924 // Perform simple memcpy. 3925 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3926 } else { 3927 // Initialize firstprivate array using element-by-element 3928 // initialization. 3929 CGF.EmitOMPAggregateAssign( 3930 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3931 Type, 3932 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3933 Address SrcElement) { 3934 // Clean up any temporaries needed by the initialization. 3935 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3936 InitScope.addPrivate( 3937 Elem, [SrcElement]() -> Address { return SrcElement; }); 3938 (void)InitScope.Privatize(); 3939 // Emit initialization for single element. 3940 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3941 CGF, &CapturesInfo); 3942 CGF.EmitAnyExprToMem(Init, DestElement, 3943 Init->getType().getQualifiers(), 3944 /*IsInitializer=*/false); 3945 }); 3946 } 3947 } else { 3948 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3949 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3950 return SharedRefLValue.getAddress(CGF); 3951 }); 3952 (void)InitScope.Privatize(); 3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3954 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3955 /*capturedByInit=*/false); 3956 } 3957 } else { 3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3959 } 3960 } 3961 ++FI; 3962 } 3963 } 3964 3965 /// Check if duplication function is required for taskloops. 3966 static bool checkInitIsRequired(CodeGenFunction &CGF, 3967 ArrayRef<PrivateDataTy> Privates) { 3968 bool InitRequired = false; 3969 for (const PrivateDataTy &Pair : Privates) { 3970 if (Pair.second.isLocalPrivate()) 3971 continue; 3972 const VarDecl *VD = Pair.second.PrivateCopy; 3973 const Expr *Init = VD->getAnyInitializer(); 3974 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3975 !CGF.isTrivialInitializer(Init)); 3976 if (InitRequired) 3977 break; 3978 } 3979 return InitRequired; 3980 } 3981 3982 3983 /// Emit task_dup function (for initialization of 3984 /// private/firstprivate/lastprivate vars and last_iter flag) 3985 /// \code 3986 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3987 /// lastpriv) { 3988 /// // setup lastprivate flag 3989 /// task_dst->last = lastpriv; 3990 /// // could be constructor calls here... 3991 /// } 3992 /// \endcode 3993 static llvm::Value * 3994 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3995 const OMPExecutableDirective &D, 3996 QualType KmpTaskTWithPrivatesPtrQTy, 3997 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3998 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3999 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4000 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4001 ASTContext &C = CGM.getContext(); 4002 FunctionArgList Args; 4003 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4004 KmpTaskTWithPrivatesPtrQTy, 4005 ImplicitParamDecl::Other); 4006 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4007 KmpTaskTWithPrivatesPtrQTy, 4008 ImplicitParamDecl::Other); 4009 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4010 ImplicitParamDecl::Other); 4011 Args.push_back(&DstArg); 4012 Args.push_back(&SrcArg); 4013 Args.push_back(&LastprivArg); 4014 const auto &TaskDupFnInfo = 4015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4016 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4017 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4018 auto *TaskDup = llvm::Function::Create( 4019 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4020 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4021 TaskDup->setDoesNotRecurse(); 4022 CodeGenFunction CGF(CGM); 4023 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4024 Loc); 4025 4026 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4027 CGF.GetAddrOfLocalVar(&DstArg), 4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4029 // task_dst->liter = lastpriv; 4030 if (WithLastIter) { 4031 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4032 LValue Base = CGF.EmitLValueForField( 4033 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4034 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4035 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4036 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4037 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4038 } 4039 4040 // Emit initial values for private copies (if any). 4041 assert(!Privates.empty()); 4042 Address KmpTaskSharedsPtr = Address::invalid(); 4043 if (!Data.FirstprivateVars.empty()) { 4044 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4045 CGF.GetAddrOfLocalVar(&SrcArg), 4046 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4047 LValue Base = CGF.EmitLValueForField( 4048 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4049 KmpTaskSharedsPtr = Address( 4050 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4051 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4052 KmpTaskTShareds)), 4053 Loc), 4054 CGM.getNaturalTypeAlignment(SharedsTy)); 4055 } 4056 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4057 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4058 CGF.FinishFunction(); 4059 return TaskDup; 4060 } 4061 4062 /// Checks if destructor function is required to be generated. 4063 /// \return true if cleanups are required, false otherwise. 4064 static bool 4065 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4066 ArrayRef<PrivateDataTy> Privates) { 4067 for (const PrivateDataTy &P : Privates) { 4068 if (P.second.isLocalPrivate()) 4069 continue; 4070 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4071 if (Ty.isDestructedType()) 4072 return true; 4073 } 4074 return false; 4075 } 4076 4077 namespace { 4078 /// Loop generator for OpenMP iterator expression. 4079 class OMPIteratorGeneratorScope final 4080 : public CodeGenFunction::OMPPrivateScope { 4081 CodeGenFunction &CGF; 4082 const OMPIteratorExpr *E = nullptr; 4083 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4084 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4085 OMPIteratorGeneratorScope() = delete; 4086 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4087 4088 public: 4089 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4090 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4091 if (!E) 4092 return; 4093 SmallVector<llvm::Value *, 4> Uppers; 4094 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4095 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4096 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4097 addPrivate(VD, [&CGF, VD]() { 4098 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4099 }); 4100 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4101 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4102 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4103 "counter.addr"); 4104 }); 4105 } 4106 Privatize(); 4107 4108 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4109 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4110 LValue CLVal = 4111 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4112 HelperData.CounterVD->getType()); 4113 // Counter = 0; 4114 CGF.EmitStoreOfScalar( 4115 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4116 CLVal); 4117 CodeGenFunction::JumpDest &ContDest = 4118 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4119 CodeGenFunction::JumpDest &ExitDest = 4120 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4121 // N = <number-of_iterations>; 4122 llvm::Value *N = Uppers[I]; 4123 // cont: 4124 // if (Counter < N) goto body; else goto exit; 4125 CGF.EmitBlock(ContDest.getBlock()); 4126 auto *CVal = 4127 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4128 llvm::Value *Cmp = 4129 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4130 ? CGF.Builder.CreateICmpSLT(CVal, N) 4131 : CGF.Builder.CreateICmpULT(CVal, N); 4132 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4133 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4134 // body: 4135 CGF.EmitBlock(BodyBB); 4136 // Iteri = Begini + Counter * Stepi; 4137 CGF.EmitIgnoredExpr(HelperData.Update); 4138 } 4139 } 4140 ~OMPIteratorGeneratorScope() { 4141 if (!E) 4142 return; 4143 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4144 // Counter = Counter + 1; 4145 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4146 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4147 // goto cont; 4148 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4149 // exit: 4150 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4151 } 4152 } 4153 }; 4154 } // namespace 4155 4156 static std::pair<llvm::Value *, llvm::Value *> 4157 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4158 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4159 llvm::Value *Addr; 4160 if (OASE) { 4161 const Expr *Base = OASE->getBase(); 4162 Addr = CGF.EmitScalarExpr(Base); 4163 } else { 4164 Addr = CGF.EmitLValue(E).getPointer(CGF); 4165 } 4166 llvm::Value *SizeVal; 4167 QualType Ty = E->getType(); 4168 if (OASE) { 4169 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4170 for (const Expr *SE : OASE->getDimensions()) { 4171 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4172 Sz = CGF.EmitScalarConversion( 4173 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4174 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4175 } 4176 } else if (const auto *ASE = 4177 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4178 LValue UpAddrLVal = 4179 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4180 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4181 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4182 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4183 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4184 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4185 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4186 } else { 4187 SizeVal = CGF.getTypeSize(Ty); 4188 } 4189 return std::make_pair(Addr, SizeVal); 4190 } 4191 4192 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4193 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4194 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4195 if (KmpTaskAffinityInfoTy.isNull()) { 4196 RecordDecl *KmpAffinityInfoRD = 4197 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4198 KmpAffinityInfoRD->startDefinition(); 4199 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4200 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4201 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4202 KmpAffinityInfoRD->completeDefinition(); 4203 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4204 } 4205 } 4206 4207 CGOpenMPRuntime::TaskResultTy 4208 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4209 const OMPExecutableDirective &D, 4210 llvm::Function *TaskFunction, QualType SharedsTy, 4211 Address Shareds, const OMPTaskDataTy &Data) { 4212 ASTContext &C = CGM.getContext(); 4213 llvm::SmallVector<PrivateDataTy, 4> Privates; 4214 // Aggregate privates and sort them by the alignment. 4215 const auto *I = Data.PrivateCopies.begin(); 4216 for (const Expr *E : Data.PrivateVars) { 4217 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4218 Privates.emplace_back( 4219 C.getDeclAlign(VD), 4220 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4221 /*PrivateElemInit=*/nullptr)); 4222 ++I; 4223 } 4224 I = Data.FirstprivateCopies.begin(); 4225 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4226 for (const Expr *E : Data.FirstprivateVars) { 4227 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4228 Privates.emplace_back( 4229 C.getDeclAlign(VD), 4230 PrivateHelpersTy( 4231 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4232 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4233 ++I; 4234 ++IElemInitRef; 4235 } 4236 I = Data.LastprivateCopies.begin(); 4237 for (const Expr *E : Data.LastprivateVars) { 4238 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4239 Privates.emplace_back( 4240 C.getDeclAlign(VD), 4241 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4242 /*PrivateElemInit=*/nullptr)); 4243 ++I; 4244 } 4245 for (const VarDecl *VD : Data.PrivateLocals) { 4246 if (isAllocatableDecl(VD)) 4247 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4248 else 4249 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4250 } 4251 llvm::stable_sort(Privates, 4252 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4253 return L.first > R.first; 4254 }); 4255 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4256 // Build type kmp_routine_entry_t (if not built yet). 4257 emitKmpRoutineEntryT(KmpInt32Ty); 4258 // Build type kmp_task_t (if not built yet). 4259 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4260 if (SavedKmpTaskloopTQTy.isNull()) { 4261 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4262 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4263 } 4264 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4265 } else { 4266 assert((D.getDirectiveKind() == OMPD_task || 4267 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4268 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4269 "Expected taskloop, task or target directive"); 4270 if (SavedKmpTaskTQTy.isNull()) { 4271 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4272 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4273 } 4274 KmpTaskTQTy = SavedKmpTaskTQTy; 4275 } 4276 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4277 // Build particular struct kmp_task_t for the given task. 4278 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4279 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4280 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4281 QualType KmpTaskTWithPrivatesPtrQTy = 4282 C.getPointerType(KmpTaskTWithPrivatesQTy); 4283 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4284 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4285 KmpTaskTWithPrivatesTy->getPointerTo(); 4286 llvm::Value *KmpTaskTWithPrivatesTySize = 4287 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4288 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4289 4290 // Emit initial values for private copies (if any). 4291 llvm::Value *TaskPrivatesMap = nullptr; 4292 llvm::Type *TaskPrivatesMapTy = 4293 std::next(TaskFunction->arg_begin(), 3)->getType(); 4294 if (!Privates.empty()) { 4295 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4296 TaskPrivatesMap = 4297 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4298 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4299 TaskPrivatesMap, TaskPrivatesMapTy); 4300 } else { 4301 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4302 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4303 } 4304 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4305 // kmp_task_t *tt); 4306 llvm::Function *TaskEntry = emitProxyTaskFunction( 4307 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4308 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4309 TaskPrivatesMap); 4310 4311 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4312 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4313 // kmp_routine_entry_t *task_entry); 4314 // Task flags. Format is taken from 4315 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4316 // description of kmp_tasking_flags struct. 4317 enum { 4318 TiedFlag = 0x1, 4319 FinalFlag = 0x2, 4320 DestructorsFlag = 0x8, 4321 PriorityFlag = 0x20, 4322 DetachableFlag = 0x40, 4323 }; 4324 unsigned Flags = Data.Tied ? TiedFlag : 0; 4325 bool NeedsCleanup = false; 4326 if (!Privates.empty()) { 4327 NeedsCleanup = 4328 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4329 if (NeedsCleanup) 4330 Flags = Flags | DestructorsFlag; 4331 } 4332 if (Data.Priority.getInt()) 4333 Flags = Flags | PriorityFlag; 4334 if (D.hasClausesOfKind<OMPDetachClause>()) 4335 Flags = Flags | DetachableFlag; 4336 llvm::Value *TaskFlags = 4337 Data.Final.getPointer() 4338 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4339 CGF.Builder.getInt32(FinalFlag), 4340 CGF.Builder.getInt32(/*C=*/0)) 4341 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4342 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4343 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4344 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4345 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4346 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4347 TaskEntry, KmpRoutineEntryPtrTy)}; 4348 llvm::Value *NewTask; 4349 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4350 // Check if we have any device clause associated with the directive. 4351 const Expr *Device = nullptr; 4352 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4353 Device = C->getDevice(); 4354 // Emit device ID if any otherwise use default value. 4355 llvm::Value *DeviceID; 4356 if (Device) 4357 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4358 CGF.Int64Ty, /*isSigned=*/true); 4359 else 4360 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4361 AllocArgs.push_back(DeviceID); 4362 NewTask = CGF.EmitRuntimeCall( 4363 OMPBuilder.getOrCreateRuntimeFunction( 4364 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4365 AllocArgs); 4366 } else { 4367 NewTask = 4368 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4369 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4370 AllocArgs); 4371 } 4372 // Emit detach clause initialization. 4373 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4374 // task_descriptor); 4375 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4376 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4377 LValue EvtLVal = CGF.EmitLValue(Evt); 4378 4379 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4380 // int gtid, kmp_task_t *task); 4381 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4382 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4383 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4384 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4385 OMPBuilder.getOrCreateRuntimeFunction( 4386 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4387 {Loc, Tid, NewTask}); 4388 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4389 Evt->getExprLoc()); 4390 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4391 } 4392 // Process affinity clauses. 4393 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4394 // Process list of affinity data. 4395 ASTContext &C = CGM.getContext(); 4396 Address AffinitiesArray = Address::invalid(); 4397 // Calculate number of elements to form the array of affinity data. 4398 llvm::Value *NumOfElements = nullptr; 4399 unsigned NumAffinities = 0; 4400 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4401 if (const Expr *Modifier = C->getModifier()) { 4402 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4403 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4404 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4405 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4406 NumOfElements = 4407 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4408 } 4409 } else { 4410 NumAffinities += C->varlist_size(); 4411 } 4412 } 4413 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4414 // Fields ids in kmp_task_affinity_info record. 4415 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4416 4417 QualType KmpTaskAffinityInfoArrayTy; 4418 if (NumOfElements) { 4419 NumOfElements = CGF.Builder.CreateNUWAdd( 4420 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4421 auto *OVE = new (C) OpaqueValueExpr( 4422 Loc, 4423 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4424 VK_PRValue); 4425 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4426 RValue::get(NumOfElements)); 4427 KmpTaskAffinityInfoArrayTy = 4428 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4429 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4430 // Properly emit variable-sized array. 4431 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4432 ImplicitParamDecl::Other); 4433 CGF.EmitVarDecl(*PD); 4434 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4435 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4436 /*isSigned=*/false); 4437 } else { 4438 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4439 KmpTaskAffinityInfoTy, 4440 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4441 ArrayType::Normal, /*IndexTypeQuals=*/0); 4442 AffinitiesArray = 4443 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4444 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4445 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4446 /*isSigned=*/false); 4447 } 4448 4449 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4450 // Fill array by elements without iterators. 4451 unsigned Pos = 0; 4452 bool HasIterator = false; 4453 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4454 if (C->getModifier()) { 4455 HasIterator = true; 4456 continue; 4457 } 4458 for (const Expr *E : C->varlists()) { 4459 llvm::Value *Addr; 4460 llvm::Value *Size; 4461 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4462 LValue Base = 4463 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4464 KmpTaskAffinityInfoTy); 4465 // affs[i].base_addr = &<Affinities[i].second>; 4466 LValue BaseAddrLVal = CGF.EmitLValueForField( 4467 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4468 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4469 BaseAddrLVal); 4470 // affs[i].len = sizeof(<Affinities[i].second>); 4471 LValue LenLVal = CGF.EmitLValueForField( 4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4473 CGF.EmitStoreOfScalar(Size, LenLVal); 4474 ++Pos; 4475 } 4476 } 4477 LValue PosLVal; 4478 if (HasIterator) { 4479 PosLVal = CGF.MakeAddrLValue( 4480 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4481 C.getSizeType()); 4482 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4483 } 4484 // Process elements with iterators. 4485 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4486 const Expr *Modifier = C->getModifier(); 4487 if (!Modifier) 4488 continue; 4489 OMPIteratorGeneratorScope IteratorScope( 4490 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4491 for (const Expr *E : C->varlists()) { 4492 llvm::Value *Addr; 4493 llvm::Value *Size; 4494 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4495 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4496 LValue Base = CGF.MakeAddrLValue( 4497 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4498 // affs[i].base_addr = &<Affinities[i].second>; 4499 LValue BaseAddrLVal = CGF.EmitLValueForField( 4500 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4501 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4502 BaseAddrLVal); 4503 // affs[i].len = sizeof(<Affinities[i].second>); 4504 LValue LenLVal = CGF.EmitLValueForField( 4505 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4506 CGF.EmitStoreOfScalar(Size, LenLVal); 4507 Idx = CGF.Builder.CreateNUWAdd( 4508 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4509 CGF.EmitStoreOfScalar(Idx, PosLVal); 4510 } 4511 } 4512 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4513 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4514 // naffins, kmp_task_affinity_info_t *affin_list); 4515 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4516 llvm::Value *GTid = getThreadID(CGF, Loc); 4517 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4518 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4519 // FIXME: Emit the function and ignore its result for now unless the 4520 // runtime function is properly implemented. 4521 (void)CGF.EmitRuntimeCall( 4522 OMPBuilder.getOrCreateRuntimeFunction( 4523 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4524 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4525 } 4526 llvm::Value *NewTaskNewTaskTTy = 4527 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4528 NewTask, KmpTaskTWithPrivatesPtrTy); 4529 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4530 KmpTaskTWithPrivatesQTy); 4531 LValue TDBase = 4532 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4533 // Fill the data in the resulting kmp_task_t record. 4534 // Copy shareds if there are any. 4535 Address KmpTaskSharedsPtr = Address::invalid(); 4536 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4537 KmpTaskSharedsPtr = 4538 Address(CGF.EmitLoadOfScalar( 4539 CGF.EmitLValueForField( 4540 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4541 KmpTaskTShareds)), 4542 Loc), 4543 CGM.getNaturalTypeAlignment(SharedsTy)); 4544 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4545 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4546 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4547 } 4548 // Emit initial values for private copies (if any). 4549 TaskResultTy Result; 4550 if (!Privates.empty()) { 4551 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4552 SharedsTy, SharedsPtrTy, Data, Privates, 4553 /*ForDup=*/false); 4554 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4555 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4556 Result.TaskDupFn = emitTaskDupFunction( 4557 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4558 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4559 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4560 } 4561 } 4562 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4563 enum { Priority = 0, Destructors = 1 }; 4564 // Provide pointer to function with destructors for privates. 4565 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4566 const RecordDecl *KmpCmplrdataUD = 4567 (*FI)->getType()->getAsUnionType()->getDecl(); 4568 if (NeedsCleanup) { 4569 llvm::Value *DestructorFn = emitDestructorsFunction( 4570 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4571 KmpTaskTWithPrivatesQTy); 4572 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4573 LValue DestructorsLV = CGF.EmitLValueForField( 4574 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4575 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4576 DestructorFn, KmpRoutineEntryPtrTy), 4577 DestructorsLV); 4578 } 4579 // Set priority. 4580 if (Data.Priority.getInt()) { 4581 LValue Data2LV = CGF.EmitLValueForField( 4582 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4583 LValue PriorityLV = CGF.EmitLValueForField( 4584 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4585 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4586 } 4587 Result.NewTask = NewTask; 4588 Result.TaskEntry = TaskEntry; 4589 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4590 Result.TDBase = TDBase; 4591 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4592 return Result; 4593 } 4594 4595 namespace { 4596 /// Dependence kind for RTL. 4597 enum RTLDependenceKindTy { 4598 DepIn = 0x01, 4599 DepInOut = 0x3, 4600 DepMutexInOutSet = 0x4, 4601 DepInOutSet = 0x8 4602 }; 4603 /// Fields ids in kmp_depend_info record. 4604 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4605 } // namespace 4606 4607 /// Translates internal dependency kind into the runtime kind. 4608 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4609 RTLDependenceKindTy DepKind; 4610 switch (K) { 4611 case OMPC_DEPEND_in: 4612 DepKind = DepIn; 4613 break; 4614 // Out and InOut dependencies must use the same code. 4615 case OMPC_DEPEND_out: 4616 case OMPC_DEPEND_inout: 4617 DepKind = DepInOut; 4618 break; 4619 case OMPC_DEPEND_mutexinoutset: 4620 DepKind = DepMutexInOutSet; 4621 break; 4622 case OMPC_DEPEND_inoutset: 4623 DepKind = DepInOutSet; 4624 break; 4625 case OMPC_DEPEND_source: 4626 case OMPC_DEPEND_sink: 4627 case OMPC_DEPEND_depobj: 4628 case OMPC_DEPEND_unknown: 4629 llvm_unreachable("Unknown task dependence type"); 4630 } 4631 return DepKind; 4632 } 4633 4634 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4635 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4636 QualType &FlagsTy) { 4637 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4638 if (KmpDependInfoTy.isNull()) { 4639 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4640 KmpDependInfoRD->startDefinition(); 4641 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4642 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4643 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4644 KmpDependInfoRD->completeDefinition(); 4645 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4646 } 4647 } 4648 4649 std::pair<llvm::Value *, LValue> 4650 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4651 SourceLocation Loc) { 4652 ASTContext &C = CGM.getContext(); 4653 QualType FlagsTy; 4654 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4655 RecordDecl *KmpDependInfoRD = 4656 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4657 LValue Base = CGF.EmitLoadOfPointerLValue( 4658 DepobjLVal.getAddress(CGF), 4659 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4660 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4661 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4662 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4663 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4664 Base.getTBAAInfo()); 4665 Address DepObjAddr = CGF.Builder.CreateGEP( 4666 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4667 LValue NumDepsBase = CGF.MakeAddrLValue( 4668 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4669 // NumDeps = deps[i].base_addr; 4670 LValue BaseAddrLVal = CGF.EmitLValueForField( 4671 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4672 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4673 return std::make_pair(NumDeps, Base); 4674 } 4675 4676 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4677 llvm::PointerUnion<unsigned *, LValue *> Pos, 4678 const OMPTaskDataTy::DependData &Data, 4679 Address DependenciesArray) { 4680 CodeGenModule &CGM = CGF.CGM; 4681 ASTContext &C = CGM.getContext(); 4682 QualType FlagsTy; 4683 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4684 RecordDecl *KmpDependInfoRD = 4685 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4686 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4687 4688 OMPIteratorGeneratorScope IteratorScope( 4689 CGF, cast_or_null<OMPIteratorExpr>( 4690 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4691 : nullptr)); 4692 for (const Expr *E : Data.DepExprs) { 4693 llvm::Value *Addr; 4694 llvm::Value *Size; 4695 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4696 LValue Base; 4697 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4698 Base = CGF.MakeAddrLValue( 4699 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4700 } else { 4701 LValue &PosLVal = *Pos.get<LValue *>(); 4702 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4703 Base = CGF.MakeAddrLValue( 4704 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4705 } 4706 // deps[i].base_addr = &<Dependencies[i].second>; 4707 LValue BaseAddrLVal = CGF.EmitLValueForField( 4708 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4709 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4710 BaseAddrLVal); 4711 // deps[i].len = sizeof(<Dependencies[i].second>); 4712 LValue LenLVal = CGF.EmitLValueForField( 4713 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4714 CGF.EmitStoreOfScalar(Size, LenLVal); 4715 // deps[i].flags = <Dependencies[i].first>; 4716 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4717 LValue FlagsLVal = CGF.EmitLValueForField( 4718 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4719 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4720 FlagsLVal); 4721 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4722 ++(*P); 4723 } else { 4724 LValue &PosLVal = *Pos.get<LValue *>(); 4725 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4726 Idx = CGF.Builder.CreateNUWAdd(Idx, 4727 llvm::ConstantInt::get(Idx->getType(), 1)); 4728 CGF.EmitStoreOfScalar(Idx, PosLVal); 4729 } 4730 } 4731 } 4732 4733 static SmallVector<llvm::Value *, 4> 4734 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4735 const OMPTaskDataTy::DependData &Data) { 4736 assert(Data.DepKind == OMPC_DEPEND_depobj && 4737 "Expected depobj dependecy kind."); 4738 SmallVector<llvm::Value *, 4> Sizes; 4739 SmallVector<LValue, 4> SizeLVals; 4740 ASTContext &C = CGF.getContext(); 4741 QualType FlagsTy; 4742 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4743 RecordDecl *KmpDependInfoRD = 4744 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4745 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4746 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4747 { 4748 OMPIteratorGeneratorScope IteratorScope( 4749 CGF, cast_or_null<OMPIteratorExpr>( 4750 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4751 : nullptr)); 4752 for (const Expr *E : Data.DepExprs) { 4753 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4754 LValue Base = CGF.EmitLoadOfPointerLValue( 4755 DepobjLVal.getAddress(CGF), 4756 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4757 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4758 Base.getAddress(CGF), KmpDependInfoPtrT); 4759 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4760 Base.getTBAAInfo()); 4761 Address DepObjAddr = CGF.Builder.CreateGEP( 4762 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4763 LValue NumDepsBase = CGF.MakeAddrLValue( 4764 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4765 // NumDeps = deps[i].base_addr; 4766 LValue BaseAddrLVal = CGF.EmitLValueForField( 4767 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4768 llvm::Value *NumDeps = 4769 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4770 LValue NumLVal = CGF.MakeAddrLValue( 4771 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4772 C.getUIntPtrType()); 4773 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4774 NumLVal.getAddress(CGF)); 4775 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4776 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4777 CGF.EmitStoreOfScalar(Add, NumLVal); 4778 SizeLVals.push_back(NumLVal); 4779 } 4780 } 4781 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4782 llvm::Value *Size = 4783 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4784 Sizes.push_back(Size); 4785 } 4786 return Sizes; 4787 } 4788 4789 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4790 LValue PosLVal, 4791 const OMPTaskDataTy::DependData &Data, 4792 Address DependenciesArray) { 4793 assert(Data.DepKind == OMPC_DEPEND_depobj && 4794 "Expected depobj dependecy kind."); 4795 ASTContext &C = CGF.getContext(); 4796 QualType FlagsTy; 4797 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4798 RecordDecl *KmpDependInfoRD = 4799 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4800 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4801 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4802 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4803 { 4804 OMPIteratorGeneratorScope IteratorScope( 4805 CGF, cast_or_null<OMPIteratorExpr>( 4806 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4807 : nullptr)); 4808 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4809 const Expr *E = Data.DepExprs[I]; 4810 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4811 LValue Base = CGF.EmitLoadOfPointerLValue( 4812 DepobjLVal.getAddress(CGF), 4813 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4814 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4815 Base.getAddress(CGF), KmpDependInfoPtrT); 4816 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4817 Base.getTBAAInfo()); 4818 4819 // Get number of elements in a single depobj. 4820 Address DepObjAddr = CGF.Builder.CreateGEP( 4821 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4822 LValue NumDepsBase = CGF.MakeAddrLValue( 4823 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4824 // NumDeps = deps[i].base_addr; 4825 LValue BaseAddrLVal = CGF.EmitLValueForField( 4826 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4827 llvm::Value *NumDeps = 4828 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4829 4830 // memcopy dependency data. 4831 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4832 ElSize, 4833 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4834 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4835 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4836 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4837 4838 // Increase pos. 4839 // pos += size; 4840 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4841 CGF.EmitStoreOfScalar(Add, PosLVal); 4842 } 4843 } 4844 } 4845 4846 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4847 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4848 SourceLocation Loc) { 4849 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4850 return D.DepExprs.empty(); 4851 })) 4852 return std::make_pair(nullptr, Address::invalid()); 4853 // Process list of dependencies. 4854 ASTContext &C = CGM.getContext(); 4855 Address DependenciesArray = Address::invalid(); 4856 llvm::Value *NumOfElements = nullptr; 4857 unsigned NumDependencies = std::accumulate( 4858 Dependencies.begin(), Dependencies.end(), 0, 4859 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4860 return D.DepKind == OMPC_DEPEND_depobj 4861 ? V 4862 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4863 }); 4864 QualType FlagsTy; 4865 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4866 bool HasDepobjDeps = false; 4867 bool HasRegularWithIterators = false; 4868 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4869 llvm::Value *NumOfRegularWithIterators = 4870 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4871 // Calculate number of depobj dependecies and regular deps with the iterators. 4872 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4873 if (D.DepKind == OMPC_DEPEND_depobj) { 4874 SmallVector<llvm::Value *, 4> Sizes = 4875 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4876 for (llvm::Value *Size : Sizes) { 4877 NumOfDepobjElements = 4878 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4879 } 4880 HasDepobjDeps = true; 4881 continue; 4882 } 4883 // Include number of iterations, if any. 4884 4885 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4886 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4887 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4888 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4889 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4890 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4891 NumOfRegularWithIterators = 4892 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4893 } 4894 HasRegularWithIterators = true; 4895 continue; 4896 } 4897 } 4898 4899 QualType KmpDependInfoArrayTy; 4900 if (HasDepobjDeps || HasRegularWithIterators) { 4901 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4902 /*isSigned=*/false); 4903 if (HasDepobjDeps) { 4904 NumOfElements = 4905 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4906 } 4907 if (HasRegularWithIterators) { 4908 NumOfElements = 4909 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4910 } 4911 auto *OVE = new (C) OpaqueValueExpr( 4912 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4913 VK_PRValue); 4914 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4915 RValue::get(NumOfElements)); 4916 KmpDependInfoArrayTy = 4917 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4918 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4919 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4920 // Properly emit variable-sized array. 4921 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4922 ImplicitParamDecl::Other); 4923 CGF.EmitVarDecl(*PD); 4924 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4925 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4926 /*isSigned=*/false); 4927 } else { 4928 KmpDependInfoArrayTy = C.getConstantArrayType( 4929 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4930 ArrayType::Normal, /*IndexTypeQuals=*/0); 4931 DependenciesArray = 4932 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4933 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4934 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4935 /*isSigned=*/false); 4936 } 4937 unsigned Pos = 0; 4938 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4939 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4940 Dependencies[I].IteratorExpr) 4941 continue; 4942 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4943 DependenciesArray); 4944 } 4945 // Copy regular dependecies with iterators. 4946 LValue PosLVal = CGF.MakeAddrLValue( 4947 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4948 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4949 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4950 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4951 !Dependencies[I].IteratorExpr) 4952 continue; 4953 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4954 DependenciesArray); 4955 } 4956 // Copy final depobj arrays without iterators. 4957 if (HasDepobjDeps) { 4958 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4959 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4960 continue; 4961 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4962 DependenciesArray); 4963 } 4964 } 4965 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4966 DependenciesArray, CGF.VoidPtrTy); 4967 return std::make_pair(NumOfElements, DependenciesArray); 4968 } 4969 4970 Address CGOpenMPRuntime::emitDepobjDependClause( 4971 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4972 SourceLocation Loc) { 4973 if (Dependencies.DepExprs.empty()) 4974 return Address::invalid(); 4975 // Process list of dependencies. 4976 ASTContext &C = CGM.getContext(); 4977 Address DependenciesArray = Address::invalid(); 4978 unsigned NumDependencies = Dependencies.DepExprs.size(); 4979 QualType FlagsTy; 4980 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4981 RecordDecl *KmpDependInfoRD = 4982 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4983 4984 llvm::Value *Size; 4985 // Define type kmp_depend_info[<Dependencies.size()>]; 4986 // For depobj reserve one extra element to store the number of elements. 4987 // It is required to handle depobj(x) update(in) construct. 4988 // kmp_depend_info[<Dependencies.size()>] deps; 4989 llvm::Value *NumDepsVal; 4990 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4991 if (const auto *IE = 4992 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4993 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4994 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4995 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4996 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4997 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4998 } 4999 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5000 NumDepsVal); 5001 CharUnits SizeInBytes = 5002 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5003 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5004 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5005 NumDepsVal = 5006 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5007 } else { 5008 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5009 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5010 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5011 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5012 Size = CGM.getSize(Sz.alignTo(Align)); 5013 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5014 } 5015 // Need to allocate on the dynamic memory. 5016 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5017 // Use default allocator. 5018 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5019 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5020 5021 llvm::Value *Addr = 5022 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5023 CGM.getModule(), OMPRTL___kmpc_alloc), 5024 Args, ".dep.arr.addr"); 5025 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5026 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5027 DependenciesArray = Address(Addr, Align); 5028 // Write number of elements in the first element of array for depobj. 5029 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5030 // deps[i].base_addr = NumDependencies; 5031 LValue BaseAddrLVal = CGF.EmitLValueForField( 5032 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5033 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5034 llvm::PointerUnion<unsigned *, LValue *> Pos; 5035 unsigned Idx = 1; 5036 LValue PosLVal; 5037 if (Dependencies.IteratorExpr) { 5038 PosLVal = CGF.MakeAddrLValue( 5039 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5040 C.getSizeType()); 5041 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5042 /*IsInit=*/true); 5043 Pos = &PosLVal; 5044 } else { 5045 Pos = &Idx; 5046 } 5047 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5048 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5049 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5050 return DependenciesArray; 5051 } 5052 5053 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5054 SourceLocation Loc) { 5055 ASTContext &C = CGM.getContext(); 5056 QualType FlagsTy; 5057 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5058 LValue Base = CGF.EmitLoadOfPointerLValue( 5059 DepobjLVal.getAddress(CGF), 5060 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5061 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5062 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5063 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5064 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5065 Addr.getElementType(), Addr.getPointer(), 5066 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5067 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5068 CGF.VoidPtrTy); 5069 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5070 // Use default allocator. 5071 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5072 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5073 5074 // _kmpc_free(gtid, addr, nullptr); 5075 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5076 CGM.getModule(), OMPRTL___kmpc_free), 5077 Args); 5078 } 5079 5080 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5081 OpenMPDependClauseKind NewDepKind, 5082 SourceLocation Loc) { 5083 ASTContext &C = CGM.getContext(); 5084 QualType FlagsTy; 5085 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5086 RecordDecl *KmpDependInfoRD = 5087 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5088 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5089 llvm::Value *NumDeps; 5090 LValue Base; 5091 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5092 5093 Address Begin = Base.getAddress(CGF); 5094 // Cast from pointer to array type to pointer to single element. 5095 llvm::Value *End = CGF.Builder.CreateGEP( 5096 Begin.getElementType(), Begin.getPointer(), NumDeps); 5097 // The basic structure here is a while-do loop. 5098 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5099 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5100 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5101 CGF.EmitBlock(BodyBB); 5102 llvm::PHINode *ElementPHI = 5103 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5104 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5105 Begin = Address(ElementPHI, Begin.getAlignment()); 5106 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5107 Base.getTBAAInfo()); 5108 // deps[i].flags = NewDepKind; 5109 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5110 LValue FlagsLVal = CGF.EmitLValueForField( 5111 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5112 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5113 FlagsLVal); 5114 5115 // Shift the address forward by one element. 5116 Address ElementNext = 5117 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5118 ElementPHI->addIncoming(ElementNext.getPointer(), 5119 CGF.Builder.GetInsertBlock()); 5120 llvm::Value *IsEmpty = 5121 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5122 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5123 // Done. 5124 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5125 } 5126 5127 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5128 const OMPExecutableDirective &D, 5129 llvm::Function *TaskFunction, 5130 QualType SharedsTy, Address Shareds, 5131 const Expr *IfCond, 5132 const OMPTaskDataTy &Data) { 5133 if (!CGF.HaveInsertPoint()) 5134 return; 5135 5136 TaskResultTy Result = 5137 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5138 llvm::Value *NewTask = Result.NewTask; 5139 llvm::Function *TaskEntry = Result.TaskEntry; 5140 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5141 LValue TDBase = Result.TDBase; 5142 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5143 // Process list of dependences. 5144 Address DependenciesArray = Address::invalid(); 5145 llvm::Value *NumOfElements; 5146 std::tie(NumOfElements, DependenciesArray) = 5147 emitDependClause(CGF, Data.Dependences, Loc); 5148 5149 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5150 // libcall. 5151 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5152 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5153 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5154 // list is not empty 5155 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5156 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5157 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5158 llvm::Value *DepTaskArgs[7]; 5159 if (!Data.Dependences.empty()) { 5160 DepTaskArgs[0] = UpLoc; 5161 DepTaskArgs[1] = ThreadID; 5162 DepTaskArgs[2] = NewTask; 5163 DepTaskArgs[3] = NumOfElements; 5164 DepTaskArgs[4] = DependenciesArray.getPointer(); 5165 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5166 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5167 } 5168 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5169 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5170 if (!Data.Tied) { 5171 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5172 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5173 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5174 } 5175 if (!Data.Dependences.empty()) { 5176 CGF.EmitRuntimeCall( 5177 OMPBuilder.getOrCreateRuntimeFunction( 5178 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5179 DepTaskArgs); 5180 } else { 5181 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5182 CGM.getModule(), OMPRTL___kmpc_omp_task), 5183 TaskArgs); 5184 } 5185 // Check if parent region is untied and build return for untied task; 5186 if (auto *Region = 5187 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5188 Region->emitUntiedSwitch(CGF); 5189 }; 5190 5191 llvm::Value *DepWaitTaskArgs[6]; 5192 if (!Data.Dependences.empty()) { 5193 DepWaitTaskArgs[0] = UpLoc; 5194 DepWaitTaskArgs[1] = ThreadID; 5195 DepWaitTaskArgs[2] = NumOfElements; 5196 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5197 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5198 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5199 } 5200 auto &M = CGM.getModule(); 5201 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5202 TaskEntry, &Data, &DepWaitTaskArgs, 5203 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5204 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5205 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5206 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5207 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5208 // is specified. 5209 if (!Data.Dependences.empty()) 5210 CGF.EmitRuntimeCall( 5211 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5212 DepWaitTaskArgs); 5213 // Call proxy_task_entry(gtid, new_task); 5214 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5215 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5216 Action.Enter(CGF); 5217 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5218 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5219 OutlinedFnArgs); 5220 }; 5221 5222 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5223 // kmp_task_t *new_task); 5224 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5225 // kmp_task_t *new_task); 5226 RegionCodeGenTy RCG(CodeGen); 5227 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5228 M, OMPRTL___kmpc_omp_task_begin_if0), 5229 TaskArgs, 5230 OMPBuilder.getOrCreateRuntimeFunction( 5231 M, OMPRTL___kmpc_omp_task_complete_if0), 5232 TaskArgs); 5233 RCG.setAction(Action); 5234 RCG(CGF); 5235 }; 5236 5237 if (IfCond) { 5238 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5239 } else { 5240 RegionCodeGenTy ThenRCG(ThenCodeGen); 5241 ThenRCG(CGF); 5242 } 5243 } 5244 5245 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5246 const OMPLoopDirective &D, 5247 llvm::Function *TaskFunction, 5248 QualType SharedsTy, Address Shareds, 5249 const Expr *IfCond, 5250 const OMPTaskDataTy &Data) { 5251 if (!CGF.HaveInsertPoint()) 5252 return; 5253 TaskResultTy Result = 5254 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5255 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5256 // libcall. 5257 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5258 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5259 // sched, kmp_uint64 grainsize, void *task_dup); 5260 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5261 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5262 llvm::Value *IfVal; 5263 if (IfCond) { 5264 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5265 /*isSigned=*/true); 5266 } else { 5267 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5268 } 5269 5270 LValue LBLVal = CGF.EmitLValueForField( 5271 Result.TDBase, 5272 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5273 const auto *LBVar = 5274 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5275 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5276 LBLVal.getQuals(), 5277 /*IsInitializer=*/true); 5278 LValue UBLVal = CGF.EmitLValueForField( 5279 Result.TDBase, 5280 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5281 const auto *UBVar = 5282 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5283 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5284 UBLVal.getQuals(), 5285 /*IsInitializer=*/true); 5286 LValue StLVal = CGF.EmitLValueForField( 5287 Result.TDBase, 5288 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5289 const auto *StVar = 5290 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5291 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5292 StLVal.getQuals(), 5293 /*IsInitializer=*/true); 5294 // Store reductions address. 5295 LValue RedLVal = CGF.EmitLValueForField( 5296 Result.TDBase, 5297 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5298 if (Data.Reductions) { 5299 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5300 } else { 5301 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5302 CGF.getContext().VoidPtrTy); 5303 } 5304 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5305 llvm::Value *TaskArgs[] = { 5306 UpLoc, 5307 ThreadID, 5308 Result.NewTask, 5309 IfVal, 5310 LBLVal.getPointer(CGF), 5311 UBLVal.getPointer(CGF), 5312 CGF.EmitLoadOfScalar(StLVal, Loc), 5313 llvm::ConstantInt::getSigned( 5314 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5315 llvm::ConstantInt::getSigned( 5316 CGF.IntTy, Data.Schedule.getPointer() 5317 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5318 : NoSchedule), 5319 Data.Schedule.getPointer() 5320 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5321 /*isSigned=*/false) 5322 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5323 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5324 Result.TaskDupFn, CGF.VoidPtrTy) 5325 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5326 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5327 CGM.getModule(), OMPRTL___kmpc_taskloop), 5328 TaskArgs); 5329 } 5330 5331 /// Emit reduction operation for each element of array (required for 5332 /// array sections) LHS op = RHS. 5333 /// \param Type Type of array. 5334 /// \param LHSVar Variable on the left side of the reduction operation 5335 /// (references element of array in original variable). 5336 /// \param RHSVar Variable on the right side of the reduction operation 5337 /// (references element of array in original variable). 5338 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5339 /// RHSVar. 5340 static void EmitOMPAggregateReduction( 5341 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5342 const VarDecl *RHSVar, 5343 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5344 const Expr *, const Expr *)> &RedOpGen, 5345 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5346 const Expr *UpExpr = nullptr) { 5347 // Perform element-by-element initialization. 5348 QualType ElementTy; 5349 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5350 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5351 5352 // Drill down to the base element type on both arrays. 5353 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5354 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5355 5356 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5357 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5358 // Cast from pointer to array type to pointer to single element. 5359 llvm::Value *LHSEnd = 5360 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5361 // The basic structure here is a while-do loop. 5362 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5363 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5364 llvm::Value *IsEmpty = 5365 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5366 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5367 5368 // Enter the loop body, making that address the current address. 5369 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5370 CGF.EmitBlock(BodyBB); 5371 5372 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5373 5374 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5375 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5376 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5377 Address RHSElementCurrent = 5378 Address(RHSElementPHI, 5379 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5380 5381 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5382 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5383 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5384 Address LHSElementCurrent = 5385 Address(LHSElementPHI, 5386 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5387 5388 // Emit copy. 5389 CodeGenFunction::OMPPrivateScope Scope(CGF); 5390 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5391 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5392 Scope.Privatize(); 5393 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5394 Scope.ForceCleanup(); 5395 5396 // Shift the address forward by one element. 5397 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5398 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5399 "omp.arraycpy.dest.element"); 5400 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5401 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5402 "omp.arraycpy.src.element"); 5403 // Check whether we've reached the end. 5404 llvm::Value *Done = 5405 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5406 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5407 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5408 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5409 5410 // Done. 5411 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5412 } 5413 5414 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5415 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5416 /// UDR combiner function. 5417 static void emitReductionCombiner(CodeGenFunction &CGF, 5418 const Expr *ReductionOp) { 5419 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5420 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5421 if (const auto *DRE = 5422 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5423 if (const auto *DRD = 5424 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5425 std::pair<llvm::Function *, llvm::Function *> Reduction = 5426 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5427 RValue Func = RValue::get(Reduction.first); 5428 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5429 CGF.EmitIgnoredExpr(ReductionOp); 5430 return; 5431 } 5432 CGF.EmitIgnoredExpr(ReductionOp); 5433 } 5434 5435 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5436 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5437 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5438 ArrayRef<const Expr *> ReductionOps) { 5439 ASTContext &C = CGM.getContext(); 5440 5441 // void reduction_func(void *LHSArg, void *RHSArg); 5442 FunctionArgList Args; 5443 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5444 ImplicitParamDecl::Other); 5445 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5446 ImplicitParamDecl::Other); 5447 Args.push_back(&LHSArg); 5448 Args.push_back(&RHSArg); 5449 const auto &CGFI = 5450 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5451 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5452 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5453 llvm::GlobalValue::InternalLinkage, Name, 5454 &CGM.getModule()); 5455 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5456 Fn->setDoesNotRecurse(); 5457 CodeGenFunction CGF(CGM); 5458 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5459 5460 // Dst = (void*[n])(LHSArg); 5461 // Src = (void*[n])(RHSArg); 5462 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5463 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5464 ArgsType), CGF.getPointerAlign()); 5465 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5466 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5467 ArgsType), CGF.getPointerAlign()); 5468 5469 // ... 5470 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5471 // ... 5472 CodeGenFunction::OMPPrivateScope Scope(CGF); 5473 auto IPriv = Privates.begin(); 5474 unsigned Idx = 0; 5475 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5476 const auto *RHSVar = 5477 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5478 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5479 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5480 }); 5481 const auto *LHSVar = 5482 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5483 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5484 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5485 }); 5486 QualType PrivTy = (*IPriv)->getType(); 5487 if (PrivTy->isVariablyModifiedType()) { 5488 // Get array size and emit VLA type. 5489 ++Idx; 5490 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5491 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5492 const VariableArrayType *VLA = 5493 CGF.getContext().getAsVariableArrayType(PrivTy); 5494 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5495 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5496 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5497 CGF.EmitVariablyModifiedType(PrivTy); 5498 } 5499 } 5500 Scope.Privatize(); 5501 IPriv = Privates.begin(); 5502 auto ILHS = LHSExprs.begin(); 5503 auto IRHS = RHSExprs.begin(); 5504 for (const Expr *E : ReductionOps) { 5505 if ((*IPriv)->getType()->isArrayType()) { 5506 // Emit reduction for array section. 5507 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5508 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5509 EmitOMPAggregateReduction( 5510 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5511 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5512 emitReductionCombiner(CGF, E); 5513 }); 5514 } else { 5515 // Emit reduction for array subscript or single variable. 5516 emitReductionCombiner(CGF, E); 5517 } 5518 ++IPriv; 5519 ++ILHS; 5520 ++IRHS; 5521 } 5522 Scope.ForceCleanup(); 5523 CGF.FinishFunction(); 5524 return Fn; 5525 } 5526 5527 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5528 const Expr *ReductionOp, 5529 const Expr *PrivateRef, 5530 const DeclRefExpr *LHS, 5531 const DeclRefExpr *RHS) { 5532 if (PrivateRef->getType()->isArrayType()) { 5533 // Emit reduction for array section. 5534 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5535 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5536 EmitOMPAggregateReduction( 5537 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5538 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5539 emitReductionCombiner(CGF, ReductionOp); 5540 }); 5541 } else { 5542 // Emit reduction for array subscript or single variable. 5543 emitReductionCombiner(CGF, ReductionOp); 5544 } 5545 } 5546 5547 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5548 ArrayRef<const Expr *> Privates, 5549 ArrayRef<const Expr *> LHSExprs, 5550 ArrayRef<const Expr *> RHSExprs, 5551 ArrayRef<const Expr *> ReductionOps, 5552 ReductionOptionsTy Options) { 5553 if (!CGF.HaveInsertPoint()) 5554 return; 5555 5556 bool WithNowait = Options.WithNowait; 5557 bool SimpleReduction = Options.SimpleReduction; 5558 5559 // Next code should be emitted for reduction: 5560 // 5561 // static kmp_critical_name lock = { 0 }; 5562 // 5563 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5564 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5565 // ... 5566 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5567 // *(Type<n>-1*)rhs[<n>-1]); 5568 // } 5569 // 5570 // ... 5571 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5572 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5573 // RedList, reduce_func, &<lock>)) { 5574 // case 1: 5575 // ... 5576 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5577 // ... 5578 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5579 // break; 5580 // case 2: 5581 // ... 5582 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5583 // ... 5584 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5585 // break; 5586 // default:; 5587 // } 5588 // 5589 // if SimpleReduction is true, only the next code is generated: 5590 // ... 5591 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5592 // ... 5593 5594 ASTContext &C = CGM.getContext(); 5595 5596 if (SimpleReduction) { 5597 CodeGenFunction::RunCleanupsScope Scope(CGF); 5598 auto IPriv = Privates.begin(); 5599 auto ILHS = LHSExprs.begin(); 5600 auto IRHS = RHSExprs.begin(); 5601 for (const Expr *E : ReductionOps) { 5602 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5603 cast<DeclRefExpr>(*IRHS)); 5604 ++IPriv; 5605 ++ILHS; 5606 ++IRHS; 5607 } 5608 return; 5609 } 5610 5611 // 1. Build a list of reduction variables. 5612 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5613 auto Size = RHSExprs.size(); 5614 for (const Expr *E : Privates) { 5615 if (E->getType()->isVariablyModifiedType()) 5616 // Reserve place for array size. 5617 ++Size; 5618 } 5619 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5620 QualType ReductionArrayTy = 5621 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5622 /*IndexTypeQuals=*/0); 5623 Address ReductionList = 5624 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5625 auto IPriv = Privates.begin(); 5626 unsigned Idx = 0; 5627 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5628 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5629 CGF.Builder.CreateStore( 5630 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5631 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5632 Elem); 5633 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5634 // Store array size. 5635 ++Idx; 5636 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5637 llvm::Value *Size = CGF.Builder.CreateIntCast( 5638 CGF.getVLASize( 5639 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5640 .NumElts, 5641 CGF.SizeTy, /*isSigned=*/false); 5642 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5643 Elem); 5644 } 5645 } 5646 5647 // 2. Emit reduce_func(). 5648 llvm::Function *ReductionFn = emitReductionFunction( 5649 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5650 LHSExprs, RHSExprs, ReductionOps); 5651 5652 // 3. Create static kmp_critical_name lock = { 0 }; 5653 std::string Name = getName({"reduction"}); 5654 llvm::Value *Lock = getCriticalRegionLock(Name); 5655 5656 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5657 // RedList, reduce_func, &<lock>); 5658 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5659 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5660 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5661 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5662 ReductionList.getPointer(), CGF.VoidPtrTy); 5663 llvm::Value *Args[] = { 5664 IdentTLoc, // ident_t *<loc> 5665 ThreadId, // i32 <gtid> 5666 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5667 ReductionArrayTySize, // size_type sizeof(RedList) 5668 RL, // void *RedList 5669 ReductionFn, // void (*) (void *, void *) <reduce_func> 5670 Lock // kmp_critical_name *&<lock> 5671 }; 5672 llvm::Value *Res = CGF.EmitRuntimeCall( 5673 OMPBuilder.getOrCreateRuntimeFunction( 5674 CGM.getModule(), 5675 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5676 Args); 5677 5678 // 5. Build switch(res) 5679 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5680 llvm::SwitchInst *SwInst = 5681 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5682 5683 // 6. Build case 1: 5684 // ... 5685 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5686 // ... 5687 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5688 // break; 5689 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5690 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5691 CGF.EmitBlock(Case1BB); 5692 5693 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5694 llvm::Value *EndArgs[] = { 5695 IdentTLoc, // ident_t *<loc> 5696 ThreadId, // i32 <gtid> 5697 Lock // kmp_critical_name *&<lock> 5698 }; 5699 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5700 CodeGenFunction &CGF, PrePostActionTy &Action) { 5701 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5702 auto IPriv = Privates.begin(); 5703 auto ILHS = LHSExprs.begin(); 5704 auto IRHS = RHSExprs.begin(); 5705 for (const Expr *E : ReductionOps) { 5706 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5707 cast<DeclRefExpr>(*IRHS)); 5708 ++IPriv; 5709 ++ILHS; 5710 ++IRHS; 5711 } 5712 }; 5713 RegionCodeGenTy RCG(CodeGen); 5714 CommonActionTy Action( 5715 nullptr, llvm::None, 5716 OMPBuilder.getOrCreateRuntimeFunction( 5717 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5718 : OMPRTL___kmpc_end_reduce), 5719 EndArgs); 5720 RCG.setAction(Action); 5721 RCG(CGF); 5722 5723 CGF.EmitBranch(DefaultBB); 5724 5725 // 7. Build case 2: 5726 // ... 5727 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5728 // ... 5729 // break; 5730 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5731 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5732 CGF.EmitBlock(Case2BB); 5733 5734 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5735 CodeGenFunction &CGF, PrePostActionTy &Action) { 5736 auto ILHS = LHSExprs.begin(); 5737 auto IRHS = RHSExprs.begin(); 5738 auto IPriv = Privates.begin(); 5739 for (const Expr *E : ReductionOps) { 5740 const Expr *XExpr = nullptr; 5741 const Expr *EExpr = nullptr; 5742 const Expr *UpExpr = nullptr; 5743 BinaryOperatorKind BO = BO_Comma; 5744 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5745 if (BO->getOpcode() == BO_Assign) { 5746 XExpr = BO->getLHS(); 5747 UpExpr = BO->getRHS(); 5748 } 5749 } 5750 // Try to emit update expression as a simple atomic. 5751 const Expr *RHSExpr = UpExpr; 5752 if (RHSExpr) { 5753 // Analyze RHS part of the whole expression. 5754 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5755 RHSExpr->IgnoreParenImpCasts())) { 5756 // If this is a conditional operator, analyze its condition for 5757 // min/max reduction operator. 5758 RHSExpr = ACO->getCond(); 5759 } 5760 if (const auto *BORHS = 5761 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5762 EExpr = BORHS->getRHS(); 5763 BO = BORHS->getOpcode(); 5764 } 5765 } 5766 if (XExpr) { 5767 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5768 auto &&AtomicRedGen = [BO, VD, 5769 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5770 const Expr *EExpr, const Expr *UpExpr) { 5771 LValue X = CGF.EmitLValue(XExpr); 5772 RValue E; 5773 if (EExpr) 5774 E = CGF.EmitAnyExpr(EExpr); 5775 CGF.EmitOMPAtomicSimpleUpdateExpr( 5776 X, E, BO, /*IsXLHSInRHSPart=*/true, 5777 llvm::AtomicOrdering::Monotonic, Loc, 5778 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5779 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5780 PrivateScope.addPrivate( 5781 VD, [&CGF, VD, XRValue, Loc]() { 5782 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5783 CGF.emitOMPSimpleStore( 5784 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5785 VD->getType().getNonReferenceType(), Loc); 5786 return LHSTemp; 5787 }); 5788 (void)PrivateScope.Privatize(); 5789 return CGF.EmitAnyExpr(UpExpr); 5790 }); 5791 }; 5792 if ((*IPriv)->getType()->isArrayType()) { 5793 // Emit atomic reduction for array section. 5794 const auto *RHSVar = 5795 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5796 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5797 AtomicRedGen, XExpr, EExpr, UpExpr); 5798 } else { 5799 // Emit atomic reduction for array subscript or single variable. 5800 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5801 } 5802 } else { 5803 // Emit as a critical region. 5804 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5805 const Expr *, const Expr *) { 5806 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5807 std::string Name = RT.getName({"atomic_reduction"}); 5808 RT.emitCriticalRegion( 5809 CGF, Name, 5810 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5811 Action.Enter(CGF); 5812 emitReductionCombiner(CGF, E); 5813 }, 5814 Loc); 5815 }; 5816 if ((*IPriv)->getType()->isArrayType()) { 5817 const auto *LHSVar = 5818 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5819 const auto *RHSVar = 5820 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5821 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5822 CritRedGen); 5823 } else { 5824 CritRedGen(CGF, nullptr, nullptr, nullptr); 5825 } 5826 } 5827 ++ILHS; 5828 ++IRHS; 5829 ++IPriv; 5830 } 5831 }; 5832 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5833 if (!WithNowait) { 5834 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5835 llvm::Value *EndArgs[] = { 5836 IdentTLoc, // ident_t *<loc> 5837 ThreadId, // i32 <gtid> 5838 Lock // kmp_critical_name *&<lock> 5839 }; 5840 CommonActionTy Action(nullptr, llvm::None, 5841 OMPBuilder.getOrCreateRuntimeFunction( 5842 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5843 EndArgs); 5844 AtomicRCG.setAction(Action); 5845 AtomicRCG(CGF); 5846 } else { 5847 AtomicRCG(CGF); 5848 } 5849 5850 CGF.EmitBranch(DefaultBB); 5851 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5852 } 5853 5854 /// Generates unique name for artificial threadprivate variables. 5855 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5856 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5857 const Expr *Ref) { 5858 SmallString<256> Buffer; 5859 llvm::raw_svector_ostream Out(Buffer); 5860 const clang::DeclRefExpr *DE; 5861 const VarDecl *D = ::getBaseDecl(Ref, DE); 5862 if (!D) 5863 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5864 D = D->getCanonicalDecl(); 5865 std::string Name = CGM.getOpenMPRuntime().getName( 5866 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5867 Out << Prefix << Name << "_" 5868 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5869 return std::string(Out.str()); 5870 } 5871 5872 /// Emits reduction initializer function: 5873 /// \code 5874 /// void @.red_init(void* %arg, void* %orig) { 5875 /// %0 = bitcast void* %arg to <type>* 5876 /// store <type> <init>, <type>* %0 5877 /// ret void 5878 /// } 5879 /// \endcode 5880 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5881 SourceLocation Loc, 5882 ReductionCodeGen &RCG, unsigned N) { 5883 ASTContext &C = CGM.getContext(); 5884 QualType VoidPtrTy = C.VoidPtrTy; 5885 VoidPtrTy.addRestrict(); 5886 FunctionArgList Args; 5887 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5888 ImplicitParamDecl::Other); 5889 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5890 ImplicitParamDecl::Other); 5891 Args.emplace_back(&Param); 5892 Args.emplace_back(&ParamOrig); 5893 const auto &FnInfo = 5894 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5895 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5896 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5897 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5898 Name, &CGM.getModule()); 5899 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5900 Fn->setDoesNotRecurse(); 5901 CodeGenFunction CGF(CGM); 5902 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5903 Address PrivateAddr = CGF.EmitLoadOfPointer( 5904 CGF.GetAddrOfLocalVar(&Param), 5905 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5906 llvm::Value *Size = nullptr; 5907 // If the size of the reduction item is non-constant, load it from global 5908 // threadprivate variable. 5909 if (RCG.getSizes(N).second) { 5910 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5911 CGF, CGM.getContext().getSizeType(), 5912 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5913 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5914 CGM.getContext().getSizeType(), Loc); 5915 } 5916 RCG.emitAggregateType(CGF, N, Size); 5917 Address OrigAddr = Address::invalid(); 5918 // If initializer uses initializer from declare reduction construct, emit a 5919 // pointer to the address of the original reduction item (reuired by reduction 5920 // initializer) 5921 if (RCG.usesReductionInitializer(N)) { 5922 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5923 OrigAddr = CGF.EmitLoadOfPointer( 5924 SharedAddr, 5925 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5926 } 5927 // Emit the initializer: 5928 // %0 = bitcast void* %arg to <type>* 5929 // store <type> <init>, <type>* %0 5930 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5931 [](CodeGenFunction &) { return false; }); 5932 CGF.FinishFunction(); 5933 return Fn; 5934 } 5935 5936 /// Emits reduction combiner function: 5937 /// \code 5938 /// void @.red_comb(void* %arg0, void* %arg1) { 5939 /// %lhs = bitcast void* %arg0 to <type>* 5940 /// %rhs = bitcast void* %arg1 to <type>* 5941 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5942 /// store <type> %2, <type>* %lhs 5943 /// ret void 5944 /// } 5945 /// \endcode 5946 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5947 SourceLocation Loc, 5948 ReductionCodeGen &RCG, unsigned N, 5949 const Expr *ReductionOp, 5950 const Expr *LHS, const Expr *RHS, 5951 const Expr *PrivateRef) { 5952 ASTContext &C = CGM.getContext(); 5953 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5954 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5955 FunctionArgList Args; 5956 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5957 C.VoidPtrTy, ImplicitParamDecl::Other); 5958 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5959 ImplicitParamDecl::Other); 5960 Args.emplace_back(&ParamInOut); 5961 Args.emplace_back(&ParamIn); 5962 const auto &FnInfo = 5963 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5964 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5965 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5966 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5967 Name, &CGM.getModule()); 5968 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5969 Fn->setDoesNotRecurse(); 5970 CodeGenFunction CGF(CGM); 5971 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5972 llvm::Value *Size = nullptr; 5973 // If the size of the reduction item is non-constant, load it from global 5974 // threadprivate variable. 5975 if (RCG.getSizes(N).second) { 5976 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5977 CGF, CGM.getContext().getSizeType(), 5978 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5979 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5980 CGM.getContext().getSizeType(), Loc); 5981 } 5982 RCG.emitAggregateType(CGF, N, Size); 5983 // Remap lhs and rhs variables to the addresses of the function arguments. 5984 // %lhs = bitcast void* %arg0 to <type>* 5985 // %rhs = bitcast void* %arg1 to <type>* 5986 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5987 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5988 // Pull out the pointer to the variable. 5989 Address PtrAddr = CGF.EmitLoadOfPointer( 5990 CGF.GetAddrOfLocalVar(&ParamInOut), 5991 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5992 return CGF.Builder.CreateElementBitCast( 5993 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5994 }); 5995 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5996 // Pull out the pointer to the variable. 5997 Address PtrAddr = CGF.EmitLoadOfPointer( 5998 CGF.GetAddrOfLocalVar(&ParamIn), 5999 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6000 return CGF.Builder.CreateElementBitCast( 6001 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6002 }); 6003 PrivateScope.Privatize(); 6004 // Emit the combiner body: 6005 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6006 // store <type> %2, <type>* %lhs 6007 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6008 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6009 cast<DeclRefExpr>(RHS)); 6010 CGF.FinishFunction(); 6011 return Fn; 6012 } 6013 6014 /// Emits reduction finalizer function: 6015 /// \code 6016 /// void @.red_fini(void* %arg) { 6017 /// %0 = bitcast void* %arg to <type>* 6018 /// <destroy>(<type>* %0) 6019 /// ret void 6020 /// } 6021 /// \endcode 6022 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6023 SourceLocation Loc, 6024 ReductionCodeGen &RCG, unsigned N) { 6025 if (!RCG.needCleanups(N)) 6026 return nullptr; 6027 ASTContext &C = CGM.getContext(); 6028 FunctionArgList Args; 6029 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6030 ImplicitParamDecl::Other); 6031 Args.emplace_back(&Param); 6032 const auto &FnInfo = 6033 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6034 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6035 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6036 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6037 Name, &CGM.getModule()); 6038 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6039 Fn->setDoesNotRecurse(); 6040 CodeGenFunction CGF(CGM); 6041 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6042 Address PrivateAddr = CGF.EmitLoadOfPointer( 6043 CGF.GetAddrOfLocalVar(&Param), 6044 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6045 llvm::Value *Size = nullptr; 6046 // If the size of the reduction item is non-constant, load it from global 6047 // threadprivate variable. 6048 if (RCG.getSizes(N).second) { 6049 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6050 CGF, CGM.getContext().getSizeType(), 6051 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6052 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6053 CGM.getContext().getSizeType(), Loc); 6054 } 6055 RCG.emitAggregateType(CGF, N, Size); 6056 // Emit the finalizer body: 6057 // <destroy>(<type>* %0) 6058 RCG.emitCleanups(CGF, N, PrivateAddr); 6059 CGF.FinishFunction(Loc); 6060 return Fn; 6061 } 6062 6063 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6064 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6065 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6066 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6067 return nullptr; 6068 6069 // Build typedef struct: 6070 // kmp_taskred_input { 6071 // void *reduce_shar; // shared reduction item 6072 // void *reduce_orig; // original reduction item used for initialization 6073 // size_t reduce_size; // size of data item 6074 // void *reduce_init; // data initialization routine 6075 // void *reduce_fini; // data finalization routine 6076 // void *reduce_comb; // data combiner routine 6077 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6078 // } kmp_taskred_input_t; 6079 ASTContext &C = CGM.getContext(); 6080 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6081 RD->startDefinition(); 6082 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6083 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6084 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6085 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6086 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6087 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6088 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6089 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6090 RD->completeDefinition(); 6091 QualType RDType = C.getRecordType(RD); 6092 unsigned Size = Data.ReductionVars.size(); 6093 llvm::APInt ArraySize(/*numBits=*/64, Size); 6094 QualType ArrayRDType = C.getConstantArrayType( 6095 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6096 // kmp_task_red_input_t .rd_input.[Size]; 6097 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6098 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6099 Data.ReductionCopies, Data.ReductionOps); 6100 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6101 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6102 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6103 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6104 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6105 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6106 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6107 ".rd_input.gep."); 6108 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6109 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6110 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6111 RCG.emitSharedOrigLValue(CGF, Cnt); 6112 llvm::Value *CastedShared = 6113 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6114 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6115 // ElemLVal.reduce_orig = &Origs[Cnt]; 6116 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6117 llvm::Value *CastedOrig = 6118 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6119 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6120 RCG.emitAggregateType(CGF, Cnt); 6121 llvm::Value *SizeValInChars; 6122 llvm::Value *SizeVal; 6123 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6124 // We use delayed creation/initialization for VLAs and array sections. It is 6125 // required because runtime does not provide the way to pass the sizes of 6126 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6127 // threadprivate global variables are used to store these values and use 6128 // them in the functions. 6129 bool DelayedCreation = !!SizeVal; 6130 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6131 /*isSigned=*/false); 6132 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6133 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6134 // ElemLVal.reduce_init = init; 6135 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6136 llvm::Value *InitAddr = 6137 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6138 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6139 // ElemLVal.reduce_fini = fini; 6140 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6141 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6142 llvm::Value *FiniAddr = Fini 6143 ? CGF.EmitCastToVoidPtr(Fini) 6144 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6145 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6146 // ElemLVal.reduce_comb = comb; 6147 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6148 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6149 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6150 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6151 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6152 // ElemLVal.flags = 0; 6153 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6154 if (DelayedCreation) { 6155 CGF.EmitStoreOfScalar( 6156 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6157 FlagsLVal); 6158 } else 6159 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6160 FlagsLVal.getType()); 6161 } 6162 if (Data.IsReductionWithTaskMod) { 6163 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6164 // is_ws, int num, void *data); 6165 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6166 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6167 CGM.IntTy, /*isSigned=*/true); 6168 llvm::Value *Args[] = { 6169 IdentTLoc, GTid, 6170 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6171 /*isSigned=*/true), 6172 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6173 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6174 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6175 return CGF.EmitRuntimeCall( 6176 OMPBuilder.getOrCreateRuntimeFunction( 6177 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6178 Args); 6179 } 6180 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6181 llvm::Value *Args[] = { 6182 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6183 /*isSigned=*/true), 6184 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6186 CGM.VoidPtrTy)}; 6187 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6188 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6189 Args); 6190 } 6191 6192 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6193 SourceLocation Loc, 6194 bool IsWorksharingReduction) { 6195 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6196 // is_ws, int num, void *data); 6197 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6198 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6199 CGM.IntTy, /*isSigned=*/true); 6200 llvm::Value *Args[] = {IdentTLoc, GTid, 6201 llvm::ConstantInt::get(CGM.IntTy, 6202 IsWorksharingReduction ? 1 : 0, 6203 /*isSigned=*/true)}; 6204 (void)CGF.EmitRuntimeCall( 6205 OMPBuilder.getOrCreateRuntimeFunction( 6206 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6207 Args); 6208 } 6209 6210 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6211 SourceLocation Loc, 6212 ReductionCodeGen &RCG, 6213 unsigned N) { 6214 auto Sizes = RCG.getSizes(N); 6215 // Emit threadprivate global variable if the type is non-constant 6216 // (Sizes.second = nullptr). 6217 if (Sizes.second) { 6218 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6219 /*isSigned=*/false); 6220 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6221 CGF, CGM.getContext().getSizeType(), 6222 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6223 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6224 } 6225 } 6226 6227 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6228 SourceLocation Loc, 6229 llvm::Value *ReductionsPtr, 6230 LValue SharedLVal) { 6231 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6232 // *d); 6233 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6234 CGM.IntTy, 6235 /*isSigned=*/true), 6236 ReductionsPtr, 6237 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6238 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6239 return Address( 6240 CGF.EmitRuntimeCall( 6241 OMPBuilder.getOrCreateRuntimeFunction( 6242 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6243 Args), 6244 SharedLVal.getAlignment()); 6245 } 6246 6247 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6248 const OMPTaskDataTy &Data) { 6249 if (!CGF.HaveInsertPoint()) 6250 return; 6251 6252 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6253 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6254 OMPBuilder.createTaskwait(CGF.Builder); 6255 } else { 6256 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6257 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6258 auto &M = CGM.getModule(); 6259 Address DependenciesArray = Address::invalid(); 6260 llvm::Value *NumOfElements; 6261 std::tie(NumOfElements, DependenciesArray) = 6262 emitDependClause(CGF, Data.Dependences, Loc); 6263 llvm::Value *DepWaitTaskArgs[6]; 6264 if (!Data.Dependences.empty()) { 6265 DepWaitTaskArgs[0] = UpLoc; 6266 DepWaitTaskArgs[1] = ThreadID; 6267 DepWaitTaskArgs[2] = NumOfElements; 6268 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6269 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6270 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6271 6272 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6273 6274 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6275 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6276 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6277 // is specified. 6278 CGF.EmitRuntimeCall( 6279 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6280 DepWaitTaskArgs); 6281 6282 } else { 6283 6284 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6285 // global_tid); 6286 llvm::Value *Args[] = {UpLoc, ThreadID}; 6287 // Ignore return result until untied tasks are supported. 6288 CGF.EmitRuntimeCall( 6289 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6290 Args); 6291 } 6292 } 6293 6294 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6295 Region->emitUntiedSwitch(CGF); 6296 } 6297 6298 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6299 OpenMPDirectiveKind InnerKind, 6300 const RegionCodeGenTy &CodeGen, 6301 bool HasCancel) { 6302 if (!CGF.HaveInsertPoint()) 6303 return; 6304 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6305 InnerKind != OMPD_critical && 6306 InnerKind != OMPD_master && 6307 InnerKind != OMPD_masked); 6308 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6309 } 6310 6311 namespace { 6312 enum RTCancelKind { 6313 CancelNoreq = 0, 6314 CancelParallel = 1, 6315 CancelLoop = 2, 6316 CancelSections = 3, 6317 CancelTaskgroup = 4 6318 }; 6319 } // anonymous namespace 6320 6321 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6322 RTCancelKind CancelKind = CancelNoreq; 6323 if (CancelRegion == OMPD_parallel) 6324 CancelKind = CancelParallel; 6325 else if (CancelRegion == OMPD_for) 6326 CancelKind = CancelLoop; 6327 else if (CancelRegion == OMPD_sections) 6328 CancelKind = CancelSections; 6329 else { 6330 assert(CancelRegion == OMPD_taskgroup); 6331 CancelKind = CancelTaskgroup; 6332 } 6333 return CancelKind; 6334 } 6335 6336 void CGOpenMPRuntime::emitCancellationPointCall( 6337 CodeGenFunction &CGF, SourceLocation Loc, 6338 OpenMPDirectiveKind CancelRegion) { 6339 if (!CGF.HaveInsertPoint()) 6340 return; 6341 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6342 // global_tid, kmp_int32 cncl_kind); 6343 if (auto *OMPRegionInfo = 6344 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6345 // For 'cancellation point taskgroup', the task region info may not have a 6346 // cancel. This may instead happen in another adjacent task. 6347 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6348 llvm::Value *Args[] = { 6349 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6350 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6351 // Ignore return result until untied tasks are supported. 6352 llvm::Value *Result = CGF.EmitRuntimeCall( 6353 OMPBuilder.getOrCreateRuntimeFunction( 6354 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6355 Args); 6356 // if (__kmpc_cancellationpoint()) { 6357 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6358 // exit from construct; 6359 // } 6360 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6361 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6362 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6363 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6364 CGF.EmitBlock(ExitBB); 6365 if (CancelRegion == OMPD_parallel) 6366 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6367 // exit from construct; 6368 CodeGenFunction::JumpDest CancelDest = 6369 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6370 CGF.EmitBranchThroughCleanup(CancelDest); 6371 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6372 } 6373 } 6374 } 6375 6376 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6377 const Expr *IfCond, 6378 OpenMPDirectiveKind CancelRegion) { 6379 if (!CGF.HaveInsertPoint()) 6380 return; 6381 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6382 // kmp_int32 cncl_kind); 6383 auto &M = CGM.getModule(); 6384 if (auto *OMPRegionInfo = 6385 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6386 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6387 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6388 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6389 llvm::Value *Args[] = { 6390 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6391 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6392 // Ignore return result until untied tasks are supported. 6393 llvm::Value *Result = CGF.EmitRuntimeCall( 6394 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6395 // if (__kmpc_cancel()) { 6396 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6397 // exit from construct; 6398 // } 6399 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6400 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6401 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6402 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6403 CGF.EmitBlock(ExitBB); 6404 if (CancelRegion == OMPD_parallel) 6405 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6406 // exit from construct; 6407 CodeGenFunction::JumpDest CancelDest = 6408 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6409 CGF.EmitBranchThroughCleanup(CancelDest); 6410 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6411 }; 6412 if (IfCond) { 6413 emitIfClause(CGF, IfCond, ThenGen, 6414 [](CodeGenFunction &, PrePostActionTy &) {}); 6415 } else { 6416 RegionCodeGenTy ThenRCG(ThenGen); 6417 ThenRCG(CGF); 6418 } 6419 } 6420 } 6421 6422 namespace { 6423 /// Cleanup action for uses_allocators support. 6424 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6425 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6426 6427 public: 6428 OMPUsesAllocatorsActionTy( 6429 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6430 : Allocators(Allocators) {} 6431 void Enter(CodeGenFunction &CGF) override { 6432 if (!CGF.HaveInsertPoint()) 6433 return; 6434 for (const auto &AllocatorData : Allocators) { 6435 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6436 CGF, AllocatorData.first, AllocatorData.second); 6437 } 6438 } 6439 void Exit(CodeGenFunction &CGF) override { 6440 if (!CGF.HaveInsertPoint()) 6441 return; 6442 for (const auto &AllocatorData : Allocators) { 6443 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6444 AllocatorData.first); 6445 } 6446 } 6447 }; 6448 } // namespace 6449 6450 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6451 const OMPExecutableDirective &D, StringRef ParentName, 6452 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6453 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6454 assert(!ParentName.empty() && "Invalid target region parent name!"); 6455 HasEmittedTargetRegion = true; 6456 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6457 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6458 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6459 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6460 if (!D.AllocatorTraits) 6461 continue; 6462 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6463 } 6464 } 6465 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6466 CodeGen.setAction(UsesAllocatorAction); 6467 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6468 IsOffloadEntry, CodeGen); 6469 } 6470 6471 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6472 const Expr *Allocator, 6473 const Expr *AllocatorTraits) { 6474 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6475 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6476 // Use default memspace handle. 6477 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6478 llvm::Value *NumTraits = llvm::ConstantInt::get( 6479 CGF.IntTy, cast<ConstantArrayType>( 6480 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6481 ->getSize() 6482 .getLimitedValue()); 6483 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6484 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6485 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6486 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6487 AllocatorTraitsLVal.getBaseInfo(), 6488 AllocatorTraitsLVal.getTBAAInfo()); 6489 llvm::Value *Traits = 6490 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6491 6492 llvm::Value *AllocatorVal = 6493 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6494 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6495 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6496 // Store to allocator. 6497 CGF.EmitVarDecl(*cast<VarDecl>( 6498 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6499 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6500 AllocatorVal = 6501 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6502 Allocator->getType(), Allocator->getExprLoc()); 6503 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6504 } 6505 6506 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6507 const Expr *Allocator) { 6508 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6509 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6510 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6511 llvm::Value *AllocatorVal = 6512 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6513 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6514 CGF.getContext().VoidPtrTy, 6515 Allocator->getExprLoc()); 6516 (void)CGF.EmitRuntimeCall( 6517 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6518 OMPRTL___kmpc_destroy_allocator), 6519 {ThreadId, AllocatorVal}); 6520 } 6521 6522 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6523 const OMPExecutableDirective &D, StringRef ParentName, 6524 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6525 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6526 // Create a unique name for the entry function using the source location 6527 // information of the current target region. The name will be something like: 6528 // 6529 // __omp_offloading_DD_FFFF_PP_lBB 6530 // 6531 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6532 // mangled name of the function that encloses the target region and BB is the 6533 // line number of the target region. 6534 6535 unsigned DeviceID; 6536 unsigned FileID; 6537 unsigned Line; 6538 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6539 Line); 6540 SmallString<64> EntryFnName; 6541 { 6542 llvm::raw_svector_ostream OS(EntryFnName); 6543 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6544 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6545 } 6546 6547 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6548 6549 CodeGenFunction CGF(CGM, true); 6550 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6551 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6552 6553 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6554 6555 // If this target outline function is not an offload entry, we don't need to 6556 // register it. 6557 if (!IsOffloadEntry) 6558 return; 6559 6560 // The target region ID is used by the runtime library to identify the current 6561 // target region, so it only has to be unique and not necessarily point to 6562 // anything. It could be the pointer to the outlined function that implements 6563 // the target region, but we aren't using that so that the compiler doesn't 6564 // need to keep that, and could therefore inline the host function if proven 6565 // worthwhile during optimization. In the other hand, if emitting code for the 6566 // device, the ID has to be the function address so that it can retrieved from 6567 // the offloading entry and launched by the runtime library. We also mark the 6568 // outlined function to have external linkage in case we are emitting code for 6569 // the device, because these functions will be entry points to the device. 6570 6571 if (CGM.getLangOpts().OpenMPIsDevice) { 6572 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6573 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6574 OutlinedFn->setDSOLocal(false); 6575 if (CGM.getTriple().isAMDGCN()) 6576 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6577 } else { 6578 std::string Name = getName({EntryFnName, "region_id"}); 6579 OutlinedFnID = new llvm::GlobalVariable( 6580 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6581 llvm::GlobalValue::WeakAnyLinkage, 6582 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6583 } 6584 6585 // Register the information for the entry associated with this target region. 6586 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6587 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6588 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6589 6590 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6591 int32_t DefaultValTeams = -1; 6592 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6593 if (DefaultValTeams > 0) { 6594 OutlinedFn->addFnAttr("omp_target_num_teams", 6595 std::to_string(DefaultValTeams)); 6596 } 6597 int32_t DefaultValThreads = -1; 6598 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6599 if (DefaultValThreads > 0) { 6600 OutlinedFn->addFnAttr("omp_target_thread_limit", 6601 std::to_string(DefaultValThreads)); 6602 } 6603 6604 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6605 } 6606 6607 /// Checks if the expression is constant or does not have non-trivial function 6608 /// calls. 6609 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6610 // We can skip constant expressions. 6611 // We can skip expressions with trivial calls or simple expressions. 6612 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6613 !E->hasNonTrivialCall(Ctx)) && 6614 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6615 } 6616 6617 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6618 const Stmt *Body) { 6619 const Stmt *Child = Body->IgnoreContainers(); 6620 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6621 Child = nullptr; 6622 for (const Stmt *S : C->body()) { 6623 if (const auto *E = dyn_cast<Expr>(S)) { 6624 if (isTrivial(Ctx, E)) 6625 continue; 6626 } 6627 // Some of the statements can be ignored. 6628 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6629 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6630 continue; 6631 // Analyze declarations. 6632 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6633 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6634 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6635 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6636 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6637 isa<UsingDirectiveDecl>(D) || 6638 isa<OMPDeclareReductionDecl>(D) || 6639 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6640 return true; 6641 const auto *VD = dyn_cast<VarDecl>(D); 6642 if (!VD) 6643 return false; 6644 return VD->hasGlobalStorage() || !VD->isUsed(); 6645 })) 6646 continue; 6647 } 6648 // Found multiple children - cannot get the one child only. 6649 if (Child) 6650 return nullptr; 6651 Child = S; 6652 } 6653 if (Child) 6654 Child = Child->IgnoreContainers(); 6655 } 6656 return Child; 6657 } 6658 6659 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6660 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6661 int32_t &DefaultVal) { 6662 6663 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6664 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6665 "Expected target-based executable directive."); 6666 switch (DirectiveKind) { 6667 case OMPD_target: { 6668 const auto *CS = D.getInnermostCapturedStmt(); 6669 const auto *Body = 6670 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6671 const Stmt *ChildStmt = 6672 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6673 if (const auto *NestedDir = 6674 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6675 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6676 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6677 const Expr *NumTeams = 6678 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6679 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6680 if (auto Constant = 6681 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6682 DefaultVal = Constant->getExtValue(); 6683 return NumTeams; 6684 } 6685 DefaultVal = 0; 6686 return nullptr; 6687 } 6688 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6689 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6690 DefaultVal = 1; 6691 return nullptr; 6692 } 6693 DefaultVal = 1; 6694 return nullptr; 6695 } 6696 // A value of -1 is used to check if we need to emit no teams region 6697 DefaultVal = -1; 6698 return nullptr; 6699 } 6700 case OMPD_target_teams: 6701 case OMPD_target_teams_distribute: 6702 case OMPD_target_teams_distribute_simd: 6703 case OMPD_target_teams_distribute_parallel_for: 6704 case OMPD_target_teams_distribute_parallel_for_simd: { 6705 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6706 const Expr *NumTeams = 6707 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6708 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6709 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6710 DefaultVal = Constant->getExtValue(); 6711 return NumTeams; 6712 } 6713 DefaultVal = 0; 6714 return nullptr; 6715 } 6716 case OMPD_target_parallel: 6717 case OMPD_target_parallel_for: 6718 case OMPD_target_parallel_for_simd: 6719 case OMPD_target_simd: 6720 DefaultVal = 1; 6721 return nullptr; 6722 case OMPD_parallel: 6723 case OMPD_for: 6724 case OMPD_parallel_for: 6725 case OMPD_parallel_master: 6726 case OMPD_parallel_sections: 6727 case OMPD_for_simd: 6728 case OMPD_parallel_for_simd: 6729 case OMPD_cancel: 6730 case OMPD_cancellation_point: 6731 case OMPD_ordered: 6732 case OMPD_threadprivate: 6733 case OMPD_allocate: 6734 case OMPD_task: 6735 case OMPD_simd: 6736 case OMPD_tile: 6737 case OMPD_unroll: 6738 case OMPD_sections: 6739 case OMPD_section: 6740 case OMPD_single: 6741 case OMPD_master: 6742 case OMPD_critical: 6743 case OMPD_taskyield: 6744 case OMPD_barrier: 6745 case OMPD_taskwait: 6746 case OMPD_taskgroup: 6747 case OMPD_atomic: 6748 case OMPD_flush: 6749 case OMPD_depobj: 6750 case OMPD_scan: 6751 case OMPD_teams: 6752 case OMPD_target_data: 6753 case OMPD_target_exit_data: 6754 case OMPD_target_enter_data: 6755 case OMPD_distribute: 6756 case OMPD_distribute_simd: 6757 case OMPD_distribute_parallel_for: 6758 case OMPD_distribute_parallel_for_simd: 6759 case OMPD_teams_distribute: 6760 case OMPD_teams_distribute_simd: 6761 case OMPD_teams_distribute_parallel_for: 6762 case OMPD_teams_distribute_parallel_for_simd: 6763 case OMPD_target_update: 6764 case OMPD_declare_simd: 6765 case OMPD_declare_variant: 6766 case OMPD_begin_declare_variant: 6767 case OMPD_end_declare_variant: 6768 case OMPD_declare_target: 6769 case OMPD_end_declare_target: 6770 case OMPD_declare_reduction: 6771 case OMPD_declare_mapper: 6772 case OMPD_taskloop: 6773 case OMPD_taskloop_simd: 6774 case OMPD_master_taskloop: 6775 case OMPD_master_taskloop_simd: 6776 case OMPD_parallel_master_taskloop: 6777 case OMPD_parallel_master_taskloop_simd: 6778 case OMPD_requires: 6779 case OMPD_metadirective: 6780 case OMPD_unknown: 6781 break; 6782 default: 6783 break; 6784 } 6785 llvm_unreachable("Unexpected directive kind."); 6786 } 6787 6788 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6789 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6790 assert(!CGF.getLangOpts().OpenMPIsDevice && 6791 "Clauses associated with the teams directive expected to be emitted " 6792 "only for the host!"); 6793 CGBuilderTy &Bld = CGF.Builder; 6794 int32_t DefaultNT = -1; 6795 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6796 if (NumTeams != nullptr) { 6797 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6798 6799 switch (DirectiveKind) { 6800 case OMPD_target: { 6801 const auto *CS = D.getInnermostCapturedStmt(); 6802 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6803 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6804 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6805 /*IgnoreResultAssign*/ true); 6806 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6807 /*isSigned=*/true); 6808 } 6809 case OMPD_target_teams: 6810 case OMPD_target_teams_distribute: 6811 case OMPD_target_teams_distribute_simd: 6812 case OMPD_target_teams_distribute_parallel_for: 6813 case OMPD_target_teams_distribute_parallel_for_simd: { 6814 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6815 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6816 /*IgnoreResultAssign*/ true); 6817 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6818 /*isSigned=*/true); 6819 } 6820 default: 6821 break; 6822 } 6823 } else if (DefaultNT == -1) { 6824 return nullptr; 6825 } 6826 6827 return Bld.getInt32(DefaultNT); 6828 } 6829 6830 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6831 llvm::Value *DefaultThreadLimitVal) { 6832 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6833 CGF.getContext(), CS->getCapturedStmt()); 6834 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6835 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6836 llvm::Value *NumThreads = nullptr; 6837 llvm::Value *CondVal = nullptr; 6838 // Handle if clause. If if clause present, the number of threads is 6839 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6840 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6841 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6842 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6843 const OMPIfClause *IfClause = nullptr; 6844 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6845 if (C->getNameModifier() == OMPD_unknown || 6846 C->getNameModifier() == OMPD_parallel) { 6847 IfClause = C; 6848 break; 6849 } 6850 } 6851 if (IfClause) { 6852 const Expr *Cond = IfClause->getCondition(); 6853 bool Result; 6854 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6855 if (!Result) 6856 return CGF.Builder.getInt32(1); 6857 } else { 6858 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6859 if (const auto *PreInit = 6860 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6861 for (const auto *I : PreInit->decls()) { 6862 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6863 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6864 } else { 6865 CodeGenFunction::AutoVarEmission Emission = 6866 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6867 CGF.EmitAutoVarCleanups(Emission); 6868 } 6869 } 6870 } 6871 CondVal = CGF.EvaluateExprAsBool(Cond); 6872 } 6873 } 6874 } 6875 // Check the value of num_threads clause iff if clause was not specified 6876 // or is not evaluated to false. 6877 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6878 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6879 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6880 const auto *NumThreadsClause = 6881 Dir->getSingleClause<OMPNumThreadsClause>(); 6882 CodeGenFunction::LexicalScope Scope( 6883 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6884 if (const auto *PreInit = 6885 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6886 for (const auto *I : PreInit->decls()) { 6887 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6888 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6889 } else { 6890 CodeGenFunction::AutoVarEmission Emission = 6891 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6892 CGF.EmitAutoVarCleanups(Emission); 6893 } 6894 } 6895 } 6896 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6897 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6898 /*isSigned=*/false); 6899 if (DefaultThreadLimitVal) 6900 NumThreads = CGF.Builder.CreateSelect( 6901 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6902 DefaultThreadLimitVal, NumThreads); 6903 } else { 6904 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6905 : CGF.Builder.getInt32(0); 6906 } 6907 // Process condition of the if clause. 6908 if (CondVal) { 6909 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6910 CGF.Builder.getInt32(1)); 6911 } 6912 return NumThreads; 6913 } 6914 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6915 return CGF.Builder.getInt32(1); 6916 return DefaultThreadLimitVal; 6917 } 6918 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6919 : CGF.Builder.getInt32(0); 6920 } 6921 6922 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6923 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6924 int32_t &DefaultVal) { 6925 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6926 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6927 "Expected target-based executable directive."); 6928 6929 switch (DirectiveKind) { 6930 case OMPD_target: 6931 // Teams have no clause thread_limit 6932 return nullptr; 6933 case OMPD_target_teams: 6934 case OMPD_target_teams_distribute: 6935 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6936 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6937 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6938 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6939 if (auto Constant = 6940 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6941 DefaultVal = Constant->getExtValue(); 6942 return ThreadLimit; 6943 } 6944 return nullptr; 6945 case OMPD_target_parallel: 6946 case OMPD_target_parallel_for: 6947 case OMPD_target_parallel_for_simd: 6948 case OMPD_target_teams_distribute_parallel_for: 6949 case OMPD_target_teams_distribute_parallel_for_simd: { 6950 Expr *ThreadLimit = nullptr; 6951 Expr *NumThreads = nullptr; 6952 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6953 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6954 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6955 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6956 if (auto Constant = 6957 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6958 DefaultVal = Constant->getExtValue(); 6959 } 6960 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6961 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6962 NumThreads = NumThreadsClause->getNumThreads(); 6963 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6964 if (auto Constant = 6965 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6966 if (Constant->getExtValue() < DefaultVal) { 6967 DefaultVal = Constant->getExtValue(); 6968 ThreadLimit = NumThreads; 6969 } 6970 } 6971 } 6972 } 6973 return ThreadLimit; 6974 } 6975 case OMPD_target_teams_distribute_simd: 6976 case OMPD_target_simd: 6977 DefaultVal = 1; 6978 return nullptr; 6979 case OMPD_parallel: 6980 case OMPD_for: 6981 case OMPD_parallel_for: 6982 case OMPD_parallel_master: 6983 case OMPD_parallel_sections: 6984 case OMPD_for_simd: 6985 case OMPD_parallel_for_simd: 6986 case OMPD_cancel: 6987 case OMPD_cancellation_point: 6988 case OMPD_ordered: 6989 case OMPD_threadprivate: 6990 case OMPD_allocate: 6991 case OMPD_task: 6992 case OMPD_simd: 6993 case OMPD_tile: 6994 case OMPD_unroll: 6995 case OMPD_sections: 6996 case OMPD_section: 6997 case OMPD_single: 6998 case OMPD_master: 6999 case OMPD_critical: 7000 case OMPD_taskyield: 7001 case OMPD_barrier: 7002 case OMPD_taskwait: 7003 case OMPD_taskgroup: 7004 case OMPD_atomic: 7005 case OMPD_flush: 7006 case OMPD_depobj: 7007 case OMPD_scan: 7008 case OMPD_teams: 7009 case OMPD_target_data: 7010 case OMPD_target_exit_data: 7011 case OMPD_target_enter_data: 7012 case OMPD_distribute: 7013 case OMPD_distribute_simd: 7014 case OMPD_distribute_parallel_for: 7015 case OMPD_distribute_parallel_for_simd: 7016 case OMPD_teams_distribute: 7017 case OMPD_teams_distribute_simd: 7018 case OMPD_teams_distribute_parallel_for: 7019 case OMPD_teams_distribute_parallel_for_simd: 7020 case OMPD_target_update: 7021 case OMPD_declare_simd: 7022 case OMPD_declare_variant: 7023 case OMPD_begin_declare_variant: 7024 case OMPD_end_declare_variant: 7025 case OMPD_declare_target: 7026 case OMPD_end_declare_target: 7027 case OMPD_declare_reduction: 7028 case OMPD_declare_mapper: 7029 case OMPD_taskloop: 7030 case OMPD_taskloop_simd: 7031 case OMPD_master_taskloop: 7032 case OMPD_master_taskloop_simd: 7033 case OMPD_parallel_master_taskloop: 7034 case OMPD_parallel_master_taskloop_simd: 7035 case OMPD_requires: 7036 case OMPD_unknown: 7037 break; 7038 default: 7039 break; 7040 } 7041 llvm_unreachable("Unsupported directive kind."); 7042 } 7043 7044 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7045 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7046 assert(!CGF.getLangOpts().OpenMPIsDevice && 7047 "Clauses associated with the teams directive expected to be emitted " 7048 "only for the host!"); 7049 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7050 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7051 "Expected target-based executable directive."); 7052 CGBuilderTy &Bld = CGF.Builder; 7053 llvm::Value *ThreadLimitVal = nullptr; 7054 llvm::Value *NumThreadsVal = nullptr; 7055 switch (DirectiveKind) { 7056 case OMPD_target: { 7057 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7058 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7059 return NumThreads; 7060 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7061 CGF.getContext(), CS->getCapturedStmt()); 7062 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7063 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7064 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7065 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7066 const auto *ThreadLimitClause = 7067 Dir->getSingleClause<OMPThreadLimitClause>(); 7068 CodeGenFunction::LexicalScope Scope( 7069 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7070 if (const auto *PreInit = 7071 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7072 for (const auto *I : PreInit->decls()) { 7073 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7074 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7075 } else { 7076 CodeGenFunction::AutoVarEmission Emission = 7077 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7078 CGF.EmitAutoVarCleanups(Emission); 7079 } 7080 } 7081 } 7082 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7083 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7084 ThreadLimitVal = 7085 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7086 } 7087 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7088 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7089 CS = Dir->getInnermostCapturedStmt(); 7090 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7091 CGF.getContext(), CS->getCapturedStmt()); 7092 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7093 } 7094 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7095 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7096 CS = Dir->getInnermostCapturedStmt(); 7097 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7098 return NumThreads; 7099 } 7100 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7101 return Bld.getInt32(1); 7102 } 7103 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7104 } 7105 case OMPD_target_teams: { 7106 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7107 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7108 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7109 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7110 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7111 ThreadLimitVal = 7112 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7113 } 7114 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7115 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7116 return NumThreads; 7117 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7118 CGF.getContext(), CS->getCapturedStmt()); 7119 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7120 if (Dir->getDirectiveKind() == OMPD_distribute) { 7121 CS = Dir->getInnermostCapturedStmt(); 7122 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7123 return NumThreads; 7124 } 7125 } 7126 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7127 } 7128 case OMPD_target_teams_distribute: 7129 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7130 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7131 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7132 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7133 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7134 ThreadLimitVal = 7135 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7136 } 7137 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7138 case OMPD_target_parallel: 7139 case OMPD_target_parallel_for: 7140 case OMPD_target_parallel_for_simd: 7141 case OMPD_target_teams_distribute_parallel_for: 7142 case OMPD_target_teams_distribute_parallel_for_simd: { 7143 llvm::Value *CondVal = nullptr; 7144 // Handle if clause. If if clause present, the number of threads is 7145 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7146 if (D.hasClausesOfKind<OMPIfClause>()) { 7147 const OMPIfClause *IfClause = nullptr; 7148 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7149 if (C->getNameModifier() == OMPD_unknown || 7150 C->getNameModifier() == OMPD_parallel) { 7151 IfClause = C; 7152 break; 7153 } 7154 } 7155 if (IfClause) { 7156 const Expr *Cond = IfClause->getCondition(); 7157 bool Result; 7158 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7159 if (!Result) 7160 return Bld.getInt32(1); 7161 } else { 7162 CodeGenFunction::RunCleanupsScope Scope(CGF); 7163 CondVal = CGF.EvaluateExprAsBool(Cond); 7164 } 7165 } 7166 } 7167 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7168 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7169 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7170 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7171 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7172 ThreadLimitVal = 7173 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7174 } 7175 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7176 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7177 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7178 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7179 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7180 NumThreadsVal = 7181 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7182 ThreadLimitVal = ThreadLimitVal 7183 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7184 ThreadLimitVal), 7185 NumThreadsVal, ThreadLimitVal) 7186 : NumThreadsVal; 7187 } 7188 if (!ThreadLimitVal) 7189 ThreadLimitVal = Bld.getInt32(0); 7190 if (CondVal) 7191 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7192 return ThreadLimitVal; 7193 } 7194 case OMPD_target_teams_distribute_simd: 7195 case OMPD_target_simd: 7196 return Bld.getInt32(1); 7197 case OMPD_parallel: 7198 case OMPD_for: 7199 case OMPD_parallel_for: 7200 case OMPD_parallel_master: 7201 case OMPD_parallel_sections: 7202 case OMPD_for_simd: 7203 case OMPD_parallel_for_simd: 7204 case OMPD_cancel: 7205 case OMPD_cancellation_point: 7206 case OMPD_ordered: 7207 case OMPD_threadprivate: 7208 case OMPD_allocate: 7209 case OMPD_task: 7210 case OMPD_simd: 7211 case OMPD_tile: 7212 case OMPD_unroll: 7213 case OMPD_sections: 7214 case OMPD_section: 7215 case OMPD_single: 7216 case OMPD_master: 7217 case OMPD_critical: 7218 case OMPD_taskyield: 7219 case OMPD_barrier: 7220 case OMPD_taskwait: 7221 case OMPD_taskgroup: 7222 case OMPD_atomic: 7223 case OMPD_flush: 7224 case OMPD_depobj: 7225 case OMPD_scan: 7226 case OMPD_teams: 7227 case OMPD_target_data: 7228 case OMPD_target_exit_data: 7229 case OMPD_target_enter_data: 7230 case OMPD_distribute: 7231 case OMPD_distribute_simd: 7232 case OMPD_distribute_parallel_for: 7233 case OMPD_distribute_parallel_for_simd: 7234 case OMPD_teams_distribute: 7235 case OMPD_teams_distribute_simd: 7236 case OMPD_teams_distribute_parallel_for: 7237 case OMPD_teams_distribute_parallel_for_simd: 7238 case OMPD_target_update: 7239 case OMPD_declare_simd: 7240 case OMPD_declare_variant: 7241 case OMPD_begin_declare_variant: 7242 case OMPD_end_declare_variant: 7243 case OMPD_declare_target: 7244 case OMPD_end_declare_target: 7245 case OMPD_declare_reduction: 7246 case OMPD_declare_mapper: 7247 case OMPD_taskloop: 7248 case OMPD_taskloop_simd: 7249 case OMPD_master_taskloop: 7250 case OMPD_master_taskloop_simd: 7251 case OMPD_parallel_master_taskloop: 7252 case OMPD_parallel_master_taskloop_simd: 7253 case OMPD_requires: 7254 case OMPD_metadirective: 7255 case OMPD_unknown: 7256 break; 7257 default: 7258 break; 7259 } 7260 llvm_unreachable("Unsupported directive kind."); 7261 } 7262 7263 namespace { 7264 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7265 7266 // Utility to handle information from clauses associated with a given 7267 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7268 // It provides a convenient interface to obtain the information and generate 7269 // code for that information. 7270 class MappableExprsHandler { 7271 public: 7272 /// Values for bit flags used to specify the mapping type for 7273 /// offloading. 7274 enum OpenMPOffloadMappingFlags : uint64_t { 7275 /// No flags 7276 OMP_MAP_NONE = 0x0, 7277 /// Allocate memory on the device and move data from host to device. 7278 OMP_MAP_TO = 0x01, 7279 /// Allocate memory on the device and move data from device to host. 7280 OMP_MAP_FROM = 0x02, 7281 /// Always perform the requested mapping action on the element, even 7282 /// if it was already mapped before. 7283 OMP_MAP_ALWAYS = 0x04, 7284 /// Delete the element from the device environment, ignoring the 7285 /// current reference count associated with the element. 7286 OMP_MAP_DELETE = 0x08, 7287 /// The element being mapped is a pointer-pointee pair; both the 7288 /// pointer and the pointee should be mapped. 7289 OMP_MAP_PTR_AND_OBJ = 0x10, 7290 /// This flags signals that the base address of an entry should be 7291 /// passed to the target kernel as an argument. 7292 OMP_MAP_TARGET_PARAM = 0x20, 7293 /// Signal that the runtime library has to return the device pointer 7294 /// in the current position for the data being mapped. Used when we have the 7295 /// use_device_ptr or use_device_addr clause. 7296 OMP_MAP_RETURN_PARAM = 0x40, 7297 /// This flag signals that the reference being passed is a pointer to 7298 /// private data. 7299 OMP_MAP_PRIVATE = 0x80, 7300 /// Pass the element to the device by value. 7301 OMP_MAP_LITERAL = 0x100, 7302 /// Implicit map 7303 OMP_MAP_IMPLICIT = 0x200, 7304 /// Close is a hint to the runtime to allocate memory close to 7305 /// the target device. 7306 OMP_MAP_CLOSE = 0x400, 7307 /// 0x800 is reserved for compatibility with XLC. 7308 /// Produce a runtime error if the data is not already allocated. 7309 OMP_MAP_PRESENT = 0x1000, 7310 // Increment and decrement a separate reference counter so that the data 7311 // cannot be unmapped within the associated region. Thus, this flag is 7312 // intended to be used on 'target' and 'target data' directives because they 7313 // are inherently structured. It is not intended to be used on 'target 7314 // enter data' and 'target exit data' directives because they are inherently 7315 // dynamic. 7316 // This is an OpenMP extension for the sake of OpenACC support. 7317 OMP_MAP_OMPX_HOLD = 0x2000, 7318 /// Signal that the runtime library should use args as an array of 7319 /// descriptor_dim pointers and use args_size as dims. Used when we have 7320 /// non-contiguous list items in target update directive 7321 OMP_MAP_NON_CONTIG = 0x100000000000, 7322 /// The 16 MSBs of the flags indicate whether the entry is member of some 7323 /// struct/class. 7324 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7325 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7326 }; 7327 7328 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7329 static unsigned getFlagMemberOffset() { 7330 unsigned Offset = 0; 7331 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7332 Remain = Remain >> 1) 7333 Offset++; 7334 return Offset; 7335 } 7336 7337 /// Class that holds debugging information for a data mapping to be passed to 7338 /// the runtime library. 7339 class MappingExprInfo { 7340 /// The variable declaration used for the data mapping. 7341 const ValueDecl *MapDecl = nullptr; 7342 /// The original expression used in the map clause, or null if there is 7343 /// none. 7344 const Expr *MapExpr = nullptr; 7345 7346 public: 7347 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7348 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7349 7350 const ValueDecl *getMapDecl() const { return MapDecl; } 7351 const Expr *getMapExpr() const { return MapExpr; } 7352 }; 7353 7354 /// Class that associates information with a base pointer to be passed to the 7355 /// runtime library. 7356 class BasePointerInfo { 7357 /// The base pointer. 7358 llvm::Value *Ptr = nullptr; 7359 /// The base declaration that refers to this device pointer, or null if 7360 /// there is none. 7361 const ValueDecl *DevPtrDecl = nullptr; 7362 7363 public: 7364 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7365 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7366 llvm::Value *operator*() const { return Ptr; } 7367 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7368 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7369 }; 7370 7371 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7372 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7373 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7374 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7375 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7376 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7377 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7378 7379 /// This structure contains combined information generated for mappable 7380 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7381 /// mappers, and non-contiguous information. 7382 struct MapCombinedInfoTy { 7383 struct StructNonContiguousInfo { 7384 bool IsNonContiguous = false; 7385 MapDimArrayTy Dims; 7386 MapNonContiguousArrayTy Offsets; 7387 MapNonContiguousArrayTy Counts; 7388 MapNonContiguousArrayTy Strides; 7389 }; 7390 MapExprsArrayTy Exprs; 7391 MapBaseValuesArrayTy BasePointers; 7392 MapValuesArrayTy Pointers; 7393 MapValuesArrayTy Sizes; 7394 MapFlagsArrayTy Types; 7395 MapMappersArrayTy Mappers; 7396 StructNonContiguousInfo NonContigInfo; 7397 7398 /// Append arrays in \a CurInfo. 7399 void append(MapCombinedInfoTy &CurInfo) { 7400 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7401 BasePointers.append(CurInfo.BasePointers.begin(), 7402 CurInfo.BasePointers.end()); 7403 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7404 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7405 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7406 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7407 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7408 CurInfo.NonContigInfo.Dims.end()); 7409 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7410 CurInfo.NonContigInfo.Offsets.end()); 7411 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7412 CurInfo.NonContigInfo.Counts.end()); 7413 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7414 CurInfo.NonContigInfo.Strides.end()); 7415 } 7416 }; 7417 7418 /// Map between a struct and the its lowest & highest elements which have been 7419 /// mapped. 7420 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7421 /// HE(FieldIndex, Pointer)} 7422 struct StructRangeInfoTy { 7423 MapCombinedInfoTy PreliminaryMapData; 7424 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7425 0, Address::invalid()}; 7426 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7427 0, Address::invalid()}; 7428 Address Base = Address::invalid(); 7429 Address LB = Address::invalid(); 7430 bool IsArraySection = false; 7431 bool HasCompleteRecord = false; 7432 }; 7433 7434 private: 7435 /// Kind that defines how a device pointer has to be returned. 7436 struct MapInfo { 7437 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7438 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7439 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7440 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7441 bool ReturnDevicePointer = false; 7442 bool IsImplicit = false; 7443 const ValueDecl *Mapper = nullptr; 7444 const Expr *VarRef = nullptr; 7445 bool ForDeviceAddr = false; 7446 7447 MapInfo() = default; 7448 MapInfo( 7449 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7450 OpenMPMapClauseKind MapType, 7451 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7452 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7453 bool ReturnDevicePointer, bool IsImplicit, 7454 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7455 bool ForDeviceAddr = false) 7456 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7457 MotionModifiers(MotionModifiers), 7458 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7459 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7460 }; 7461 7462 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7463 /// member and there is no map information about it, then emission of that 7464 /// entry is deferred until the whole struct has been processed. 7465 struct DeferredDevicePtrEntryTy { 7466 const Expr *IE = nullptr; 7467 const ValueDecl *VD = nullptr; 7468 bool ForDeviceAddr = false; 7469 7470 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7471 bool ForDeviceAddr) 7472 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7473 }; 7474 7475 /// The target directive from where the mappable clauses were extracted. It 7476 /// is either a executable directive or a user-defined mapper directive. 7477 llvm::PointerUnion<const OMPExecutableDirective *, 7478 const OMPDeclareMapperDecl *> 7479 CurDir; 7480 7481 /// Function the directive is being generated for. 7482 CodeGenFunction &CGF; 7483 7484 /// Set of all first private variables in the current directive. 7485 /// bool data is set to true if the variable is implicitly marked as 7486 /// firstprivate, false otherwise. 7487 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7488 7489 /// Map between device pointer declarations and their expression components. 7490 /// The key value for declarations in 'this' is null. 7491 llvm::DenseMap< 7492 const ValueDecl *, 7493 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7494 DevPointersMap; 7495 7496 /// Map between lambda declarations and their map type. 7497 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7498 7499 llvm::Value *getExprTypeSize(const Expr *E) const { 7500 QualType ExprTy = E->getType().getCanonicalType(); 7501 7502 // Calculate the size for array shaping expression. 7503 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7504 llvm::Value *Size = 7505 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7506 for (const Expr *SE : OAE->getDimensions()) { 7507 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7508 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7509 CGF.getContext().getSizeType(), 7510 SE->getExprLoc()); 7511 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7512 } 7513 return Size; 7514 } 7515 7516 // Reference types are ignored for mapping purposes. 7517 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7518 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7519 7520 // Given that an array section is considered a built-in type, we need to 7521 // do the calculation based on the length of the section instead of relying 7522 // on CGF.getTypeSize(E->getType()). 7523 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7524 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7525 OAE->getBase()->IgnoreParenImpCasts()) 7526 .getCanonicalType(); 7527 7528 // If there is no length associated with the expression and lower bound is 7529 // not specified too, that means we are using the whole length of the 7530 // base. 7531 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7532 !OAE->getLowerBound()) 7533 return CGF.getTypeSize(BaseTy); 7534 7535 llvm::Value *ElemSize; 7536 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7537 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7538 } else { 7539 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7540 assert(ATy && "Expecting array type if not a pointer type."); 7541 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7542 } 7543 7544 // If we don't have a length at this point, that is because we have an 7545 // array section with a single element. 7546 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7547 return ElemSize; 7548 7549 if (const Expr *LenExpr = OAE->getLength()) { 7550 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7551 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7552 CGF.getContext().getSizeType(), 7553 LenExpr->getExprLoc()); 7554 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7555 } 7556 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7557 OAE->getLowerBound() && "expected array_section[lb:]."); 7558 // Size = sizetype - lb * elemtype; 7559 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7560 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7561 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7562 CGF.getContext().getSizeType(), 7563 OAE->getLowerBound()->getExprLoc()); 7564 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7565 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7566 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7567 LengthVal = CGF.Builder.CreateSelect( 7568 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7569 return LengthVal; 7570 } 7571 return CGF.getTypeSize(ExprTy); 7572 } 7573 7574 /// Return the corresponding bits for a given map clause modifier. Add 7575 /// a flag marking the map as a pointer if requested. Add a flag marking the 7576 /// map as the first one of a series of maps that relate to the same map 7577 /// expression. 7578 OpenMPOffloadMappingFlags getMapTypeBits( 7579 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7580 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7581 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7582 OpenMPOffloadMappingFlags Bits = 7583 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7584 switch (MapType) { 7585 case OMPC_MAP_alloc: 7586 case OMPC_MAP_release: 7587 // alloc and release is the default behavior in the runtime library, i.e. 7588 // if we don't pass any bits alloc/release that is what the runtime is 7589 // going to do. Therefore, we don't need to signal anything for these two 7590 // type modifiers. 7591 break; 7592 case OMPC_MAP_to: 7593 Bits |= OMP_MAP_TO; 7594 break; 7595 case OMPC_MAP_from: 7596 Bits |= OMP_MAP_FROM; 7597 break; 7598 case OMPC_MAP_tofrom: 7599 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7600 break; 7601 case OMPC_MAP_delete: 7602 Bits |= OMP_MAP_DELETE; 7603 break; 7604 case OMPC_MAP_unknown: 7605 llvm_unreachable("Unexpected map type!"); 7606 } 7607 if (AddPtrFlag) 7608 Bits |= OMP_MAP_PTR_AND_OBJ; 7609 if (AddIsTargetParamFlag) 7610 Bits |= OMP_MAP_TARGET_PARAM; 7611 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7612 Bits |= OMP_MAP_ALWAYS; 7613 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7614 Bits |= OMP_MAP_CLOSE; 7615 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7616 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7617 Bits |= OMP_MAP_PRESENT; 7618 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7619 Bits |= OMP_MAP_OMPX_HOLD; 7620 if (IsNonContiguous) 7621 Bits |= OMP_MAP_NON_CONTIG; 7622 return Bits; 7623 } 7624 7625 /// Return true if the provided expression is a final array section. A 7626 /// final array section, is one whose length can't be proved to be one. 7627 bool isFinalArraySectionExpression(const Expr *E) const { 7628 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7629 7630 // It is not an array section and therefore not a unity-size one. 7631 if (!OASE) 7632 return false; 7633 7634 // An array section with no colon always refer to a single element. 7635 if (OASE->getColonLocFirst().isInvalid()) 7636 return false; 7637 7638 const Expr *Length = OASE->getLength(); 7639 7640 // If we don't have a length we have to check if the array has size 1 7641 // for this dimension. Also, we should always expect a length if the 7642 // base type is pointer. 7643 if (!Length) { 7644 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7645 OASE->getBase()->IgnoreParenImpCasts()) 7646 .getCanonicalType(); 7647 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7648 return ATy->getSize().getSExtValue() != 1; 7649 // If we don't have a constant dimension length, we have to consider 7650 // the current section as having any size, so it is not necessarily 7651 // unitary. If it happen to be unity size, that's user fault. 7652 return true; 7653 } 7654 7655 // Check if the length evaluates to 1. 7656 Expr::EvalResult Result; 7657 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7658 return true; // Can have more that size 1. 7659 7660 llvm::APSInt ConstLength = Result.Val.getInt(); 7661 return ConstLength.getSExtValue() != 1; 7662 } 7663 7664 /// Generate the base pointers, section pointers, sizes, map type bits, and 7665 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7666 /// map type, map or motion modifiers, and expression components. 7667 /// \a IsFirstComponent should be set to true if the provided set of 7668 /// components is the first associated with a capture. 7669 void generateInfoForComponentList( 7670 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7671 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7672 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7673 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7674 bool IsFirstComponentList, bool IsImplicit, 7675 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7676 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7677 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7678 OverlappedElements = llvm::None) const { 7679 // The following summarizes what has to be generated for each map and the 7680 // types below. The generated information is expressed in this order: 7681 // base pointer, section pointer, size, flags 7682 // (to add to the ones that come from the map type and modifier). 7683 // 7684 // double d; 7685 // int i[100]; 7686 // float *p; 7687 // 7688 // struct S1 { 7689 // int i; 7690 // float f[50]; 7691 // } 7692 // struct S2 { 7693 // int i; 7694 // float f[50]; 7695 // S1 s; 7696 // double *p; 7697 // struct S2 *ps; 7698 // int &ref; 7699 // } 7700 // S2 s; 7701 // S2 *ps; 7702 // 7703 // map(d) 7704 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7705 // 7706 // map(i) 7707 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7708 // 7709 // map(i[1:23]) 7710 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7711 // 7712 // map(p) 7713 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7714 // 7715 // map(p[1:24]) 7716 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7717 // in unified shared memory mode or for local pointers 7718 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7719 // 7720 // map(s) 7721 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7722 // 7723 // map(s.i) 7724 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7725 // 7726 // map(s.s.f) 7727 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7728 // 7729 // map(s.p) 7730 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7731 // 7732 // map(to: s.p[:22]) 7733 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7734 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7735 // &(s.p), &(s.p[0]), 22*sizeof(double), 7736 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7737 // (*) alloc space for struct members, only this is a target parameter 7738 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7739 // optimizes this entry out, same in the examples below) 7740 // (***) map the pointee (map: to) 7741 // 7742 // map(to: s.ref) 7743 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7744 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7745 // (*) alloc space for struct members, only this is a target parameter 7746 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7747 // optimizes this entry out, same in the examples below) 7748 // (***) map the pointee (map: to) 7749 // 7750 // map(s.ps) 7751 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7752 // 7753 // map(from: s.ps->s.i) 7754 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7755 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7756 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7757 // 7758 // map(to: s.ps->ps) 7759 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7760 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7761 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7762 // 7763 // map(s.ps->ps->ps) 7764 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7765 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7766 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7767 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7768 // 7769 // map(to: s.ps->ps->s.f[:22]) 7770 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7771 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7772 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7773 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7774 // 7775 // map(ps) 7776 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7777 // 7778 // map(ps->i) 7779 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7780 // 7781 // map(ps->s.f) 7782 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7783 // 7784 // map(from: ps->p) 7785 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7786 // 7787 // map(to: ps->p[:22]) 7788 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7789 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7790 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7791 // 7792 // map(ps->ps) 7793 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7794 // 7795 // map(from: ps->ps->s.i) 7796 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7797 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7798 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7799 // 7800 // map(from: ps->ps->ps) 7801 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7802 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7803 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7804 // 7805 // map(ps->ps->ps->ps) 7806 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7807 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7808 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7809 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7810 // 7811 // map(to: ps->ps->ps->s.f[:22]) 7812 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7813 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7814 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7815 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7816 // 7817 // map(to: s.f[:22]) map(from: s.p[:33]) 7818 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7819 // sizeof(double*) (**), TARGET_PARAM 7820 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7821 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7822 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7823 // (*) allocate contiguous space needed to fit all mapped members even if 7824 // we allocate space for members not mapped (in this example, 7825 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7826 // them as well because they fall between &s.f[0] and &s.p) 7827 // 7828 // map(from: s.f[:22]) map(to: ps->p[:33]) 7829 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7830 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7831 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7832 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7833 // (*) the struct this entry pertains to is the 2nd element in the list of 7834 // arguments, hence MEMBER_OF(2) 7835 // 7836 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7837 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7838 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7839 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7840 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7841 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7842 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7843 // (*) the struct this entry pertains to is the 4th element in the list 7844 // of arguments, hence MEMBER_OF(4) 7845 7846 // Track if the map information being generated is the first for a capture. 7847 bool IsCaptureFirstInfo = IsFirstComponentList; 7848 // When the variable is on a declare target link or in a to clause with 7849 // unified memory, a reference is needed to hold the host/device address 7850 // of the variable. 7851 bool RequiresReference = false; 7852 7853 // Scan the components from the base to the complete expression. 7854 auto CI = Components.rbegin(); 7855 auto CE = Components.rend(); 7856 auto I = CI; 7857 7858 // Track if the map information being generated is the first for a list of 7859 // components. 7860 bool IsExpressionFirstInfo = true; 7861 bool FirstPointerInComplexData = false; 7862 Address BP = Address::invalid(); 7863 const Expr *AssocExpr = I->getAssociatedExpression(); 7864 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7865 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7866 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7867 7868 if (isa<MemberExpr>(AssocExpr)) { 7869 // The base is the 'this' pointer. The content of the pointer is going 7870 // to be the base of the field being mapped. 7871 BP = CGF.LoadCXXThisAddress(); 7872 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7873 (OASE && 7874 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7875 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7876 } else if (OAShE && 7877 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7878 BP = Address( 7879 CGF.EmitScalarExpr(OAShE->getBase()), 7880 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7881 } else { 7882 // The base is the reference to the variable. 7883 // BP = &Var. 7884 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7885 if (const auto *VD = 7886 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7887 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7888 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7889 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7890 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7891 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7892 RequiresReference = true; 7893 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7894 } 7895 } 7896 } 7897 7898 // If the variable is a pointer and is being dereferenced (i.e. is not 7899 // the last component), the base has to be the pointer itself, not its 7900 // reference. References are ignored for mapping purposes. 7901 QualType Ty = 7902 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7903 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7904 // No need to generate individual map information for the pointer, it 7905 // can be associated with the combined storage if shared memory mode is 7906 // active or the base declaration is not global variable. 7907 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7908 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7909 !VD || VD->hasLocalStorage()) 7910 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7911 else 7912 FirstPointerInComplexData = true; 7913 ++I; 7914 } 7915 } 7916 7917 // Track whether a component of the list should be marked as MEMBER_OF some 7918 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7919 // in a component list should be marked as MEMBER_OF, all subsequent entries 7920 // do not belong to the base struct. E.g. 7921 // struct S2 s; 7922 // s.ps->ps->ps->f[:] 7923 // (1) (2) (3) (4) 7924 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7925 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7926 // is the pointee of ps(2) which is not member of struct s, so it should not 7927 // be marked as such (it is still PTR_AND_OBJ). 7928 // The variable is initialized to false so that PTR_AND_OBJ entries which 7929 // are not struct members are not considered (e.g. array of pointers to 7930 // data). 7931 bool ShouldBeMemberOf = false; 7932 7933 // Variable keeping track of whether or not we have encountered a component 7934 // in the component list which is a member expression. Useful when we have a 7935 // pointer or a final array section, in which case it is the previous 7936 // component in the list which tells us whether we have a member expression. 7937 // E.g. X.f[:] 7938 // While processing the final array section "[:]" it is "f" which tells us 7939 // whether we are dealing with a member of a declared struct. 7940 const MemberExpr *EncounteredME = nullptr; 7941 7942 // Track for the total number of dimension. Start from one for the dummy 7943 // dimension. 7944 uint64_t DimSize = 1; 7945 7946 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7947 bool IsPrevMemberReference = false; 7948 7949 for (; I != CE; ++I) { 7950 // If the current component is member of a struct (parent struct) mark it. 7951 if (!EncounteredME) { 7952 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7953 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7954 // as MEMBER_OF the parent struct. 7955 if (EncounteredME) { 7956 ShouldBeMemberOf = true; 7957 // Do not emit as complex pointer if this is actually not array-like 7958 // expression. 7959 if (FirstPointerInComplexData) { 7960 QualType Ty = std::prev(I) 7961 ->getAssociatedDeclaration() 7962 ->getType() 7963 .getNonReferenceType(); 7964 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7965 FirstPointerInComplexData = false; 7966 } 7967 } 7968 } 7969 7970 auto Next = std::next(I); 7971 7972 // We need to generate the addresses and sizes if this is the last 7973 // component, if the component is a pointer or if it is an array section 7974 // whose length can't be proved to be one. If this is a pointer, it 7975 // becomes the base address for the following components. 7976 7977 // A final array section, is one whose length can't be proved to be one. 7978 // If the map item is non-contiguous then we don't treat any array section 7979 // as final array section. 7980 bool IsFinalArraySection = 7981 !IsNonContiguous && 7982 isFinalArraySectionExpression(I->getAssociatedExpression()); 7983 7984 // If we have a declaration for the mapping use that, otherwise use 7985 // the base declaration of the map clause. 7986 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7987 ? I->getAssociatedDeclaration() 7988 : BaseDecl; 7989 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7990 : MapExpr; 7991 7992 // Get information on whether the element is a pointer. Have to do a 7993 // special treatment for array sections given that they are built-in 7994 // types. 7995 const auto *OASE = 7996 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7997 const auto *OAShE = 7998 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7999 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8000 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8001 bool IsPointer = 8002 OAShE || 8003 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8004 .getCanonicalType() 8005 ->isAnyPointerType()) || 8006 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8007 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8008 MapDecl && 8009 MapDecl->getType()->isLValueReferenceType(); 8010 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8011 8012 if (OASE) 8013 ++DimSize; 8014 8015 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8016 IsFinalArraySection) { 8017 // If this is not the last component, we expect the pointer to be 8018 // associated with an array expression or member expression. 8019 assert((Next == CE || 8020 isa<MemberExpr>(Next->getAssociatedExpression()) || 8021 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8022 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8023 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8024 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8025 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8026 "Unexpected expression"); 8027 8028 Address LB = Address::invalid(); 8029 Address LowestElem = Address::invalid(); 8030 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8031 const MemberExpr *E) { 8032 const Expr *BaseExpr = E->getBase(); 8033 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8034 // scalar. 8035 LValue BaseLV; 8036 if (E->isArrow()) { 8037 LValueBaseInfo BaseInfo; 8038 TBAAAccessInfo TBAAInfo; 8039 Address Addr = 8040 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8041 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8042 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8043 } else { 8044 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8045 } 8046 return BaseLV; 8047 }; 8048 if (OAShE) { 8049 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8050 CGF.getContext().getTypeAlignInChars( 8051 OAShE->getBase()->getType())); 8052 } else if (IsMemberReference) { 8053 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8054 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8055 LowestElem = CGF.EmitLValueForFieldInitialization( 8056 BaseLVal, cast<FieldDecl>(MapDecl)) 8057 .getAddress(CGF); 8058 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8059 .getAddress(CGF); 8060 } else { 8061 LowestElem = LB = 8062 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8063 .getAddress(CGF); 8064 } 8065 8066 // If this component is a pointer inside the base struct then we don't 8067 // need to create any entry for it - it will be combined with the object 8068 // it is pointing to into a single PTR_AND_OBJ entry. 8069 bool IsMemberPointerOrAddr = 8070 EncounteredME && 8071 (((IsPointer || ForDeviceAddr) && 8072 I->getAssociatedExpression() == EncounteredME) || 8073 (IsPrevMemberReference && !IsPointer) || 8074 (IsMemberReference && Next != CE && 8075 !Next->getAssociatedExpression()->getType()->isPointerType())); 8076 if (!OverlappedElements.empty() && Next == CE) { 8077 // Handle base element with the info for overlapped elements. 8078 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8079 assert(!IsPointer && 8080 "Unexpected base element with the pointer type."); 8081 // Mark the whole struct as the struct that requires allocation on the 8082 // device. 8083 PartialStruct.LowestElem = {0, LowestElem}; 8084 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8085 I->getAssociatedExpression()->getType()); 8086 Address HB = CGF.Builder.CreateConstGEP( 8087 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8088 CGF.VoidPtrTy), 8089 TypeSize.getQuantity() - 1); 8090 PartialStruct.HighestElem = { 8091 std::numeric_limits<decltype( 8092 PartialStruct.HighestElem.first)>::max(), 8093 HB}; 8094 PartialStruct.Base = BP; 8095 PartialStruct.LB = LB; 8096 assert( 8097 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8098 "Overlapped elements must be used only once for the variable."); 8099 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8100 // Emit data for non-overlapped data. 8101 OpenMPOffloadMappingFlags Flags = 8102 OMP_MAP_MEMBER_OF | 8103 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8104 /*AddPtrFlag=*/false, 8105 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8106 llvm::Value *Size = nullptr; 8107 // Do bitcopy of all non-overlapped structure elements. 8108 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8109 Component : OverlappedElements) { 8110 Address ComponentLB = Address::invalid(); 8111 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8112 Component) { 8113 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8114 const auto *FD = dyn_cast<FieldDecl>(VD); 8115 if (FD && FD->getType()->isLValueReferenceType()) { 8116 const auto *ME = 8117 cast<MemberExpr>(MC.getAssociatedExpression()); 8118 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8119 ComponentLB = 8120 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8121 .getAddress(CGF); 8122 } else { 8123 ComponentLB = 8124 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8125 .getAddress(CGF); 8126 } 8127 Size = CGF.Builder.CreatePtrDiff( 8128 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8129 CGF.EmitCastToVoidPtr(LB.getPointer())); 8130 break; 8131 } 8132 } 8133 assert(Size && "Failed to determine structure size"); 8134 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8135 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8136 CombinedInfo.Pointers.push_back(LB.getPointer()); 8137 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8138 Size, CGF.Int64Ty, /*isSigned=*/true)); 8139 CombinedInfo.Types.push_back(Flags); 8140 CombinedInfo.Mappers.push_back(nullptr); 8141 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8142 : 1); 8143 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8144 } 8145 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8146 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8147 CombinedInfo.Pointers.push_back(LB.getPointer()); 8148 Size = CGF.Builder.CreatePtrDiff( 8149 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8150 CGF.EmitCastToVoidPtr(LB.getPointer())); 8151 CombinedInfo.Sizes.push_back( 8152 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8153 CombinedInfo.Types.push_back(Flags); 8154 CombinedInfo.Mappers.push_back(nullptr); 8155 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8156 : 1); 8157 break; 8158 } 8159 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8160 if (!IsMemberPointerOrAddr || 8161 (Next == CE && MapType != OMPC_MAP_unknown)) { 8162 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8163 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8164 CombinedInfo.Pointers.push_back(LB.getPointer()); 8165 CombinedInfo.Sizes.push_back( 8166 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8167 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8168 : 1); 8169 8170 // If Mapper is valid, the last component inherits the mapper. 8171 bool HasMapper = Mapper && Next == CE; 8172 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8173 8174 // We need to add a pointer flag for each map that comes from the 8175 // same expression except for the first one. We also need to signal 8176 // this map is the first one that relates with the current capture 8177 // (there is a set of entries for each capture). 8178 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8179 MapType, MapModifiers, MotionModifiers, IsImplicit, 8180 !IsExpressionFirstInfo || RequiresReference || 8181 FirstPointerInComplexData || IsMemberReference, 8182 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8183 8184 if (!IsExpressionFirstInfo || IsMemberReference) { 8185 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8186 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8187 if (IsPointer || (IsMemberReference && Next != CE)) 8188 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8189 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8190 8191 if (ShouldBeMemberOf) { 8192 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8193 // should be later updated with the correct value of MEMBER_OF. 8194 Flags |= OMP_MAP_MEMBER_OF; 8195 // From now on, all subsequent PTR_AND_OBJ entries should not be 8196 // marked as MEMBER_OF. 8197 ShouldBeMemberOf = false; 8198 } 8199 } 8200 8201 CombinedInfo.Types.push_back(Flags); 8202 } 8203 8204 // If we have encountered a member expression so far, keep track of the 8205 // mapped member. If the parent is "*this", then the value declaration 8206 // is nullptr. 8207 if (EncounteredME) { 8208 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8209 unsigned FieldIndex = FD->getFieldIndex(); 8210 8211 // Update info about the lowest and highest elements for this struct 8212 if (!PartialStruct.Base.isValid()) { 8213 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8214 if (IsFinalArraySection) { 8215 Address HB = 8216 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8217 .getAddress(CGF); 8218 PartialStruct.HighestElem = {FieldIndex, HB}; 8219 } else { 8220 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8221 } 8222 PartialStruct.Base = BP; 8223 PartialStruct.LB = BP; 8224 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8225 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8226 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8227 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8228 } 8229 } 8230 8231 // Need to emit combined struct for array sections. 8232 if (IsFinalArraySection || IsNonContiguous) 8233 PartialStruct.IsArraySection = true; 8234 8235 // If we have a final array section, we are done with this expression. 8236 if (IsFinalArraySection) 8237 break; 8238 8239 // The pointer becomes the base for the next element. 8240 if (Next != CE) 8241 BP = IsMemberReference ? LowestElem : LB; 8242 8243 IsExpressionFirstInfo = false; 8244 IsCaptureFirstInfo = false; 8245 FirstPointerInComplexData = false; 8246 IsPrevMemberReference = IsMemberReference; 8247 } else if (FirstPointerInComplexData) { 8248 QualType Ty = Components.rbegin() 8249 ->getAssociatedDeclaration() 8250 ->getType() 8251 .getNonReferenceType(); 8252 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8253 FirstPointerInComplexData = false; 8254 } 8255 } 8256 // If ran into the whole component - allocate the space for the whole 8257 // record. 8258 if (!EncounteredME) 8259 PartialStruct.HasCompleteRecord = true; 8260 8261 if (!IsNonContiguous) 8262 return; 8263 8264 const ASTContext &Context = CGF.getContext(); 8265 8266 // For supporting stride in array section, we need to initialize the first 8267 // dimension size as 1, first offset as 0, and first count as 1 8268 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8269 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8270 MapValuesArrayTy CurStrides; 8271 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8272 uint64_t ElementTypeSize; 8273 8274 // Collect Size information for each dimension and get the element size as 8275 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8276 // should be [10, 10] and the first stride is 4 btyes. 8277 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8278 Components) { 8279 const Expr *AssocExpr = Component.getAssociatedExpression(); 8280 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8281 8282 if (!OASE) 8283 continue; 8284 8285 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8286 auto *CAT = Context.getAsConstantArrayType(Ty); 8287 auto *VAT = Context.getAsVariableArrayType(Ty); 8288 8289 // We need all the dimension size except for the last dimension. 8290 assert((VAT || CAT || &Component == &*Components.begin()) && 8291 "Should be either ConstantArray or VariableArray if not the " 8292 "first Component"); 8293 8294 // Get element size if CurStrides is empty. 8295 if (CurStrides.empty()) { 8296 const Type *ElementType = nullptr; 8297 if (CAT) 8298 ElementType = CAT->getElementType().getTypePtr(); 8299 else if (VAT) 8300 ElementType = VAT->getElementType().getTypePtr(); 8301 else 8302 assert(&Component == &*Components.begin() && 8303 "Only expect pointer (non CAT or VAT) when this is the " 8304 "first Component"); 8305 // If ElementType is null, then it means the base is a pointer 8306 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8307 // for next iteration. 8308 if (ElementType) { 8309 // For the case that having pointer as base, we need to remove one 8310 // level of indirection. 8311 if (&Component != &*Components.begin()) 8312 ElementType = ElementType->getPointeeOrArrayElementType(); 8313 ElementTypeSize = 8314 Context.getTypeSizeInChars(ElementType).getQuantity(); 8315 CurStrides.push_back( 8316 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8317 } 8318 } 8319 // Get dimension value except for the last dimension since we don't need 8320 // it. 8321 if (DimSizes.size() < Components.size() - 1) { 8322 if (CAT) 8323 DimSizes.push_back(llvm::ConstantInt::get( 8324 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8325 else if (VAT) 8326 DimSizes.push_back(CGF.Builder.CreateIntCast( 8327 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8328 /*IsSigned=*/false)); 8329 } 8330 } 8331 8332 // Skip the dummy dimension since we have already have its information. 8333 auto DI = DimSizes.begin() + 1; 8334 // Product of dimension. 8335 llvm::Value *DimProd = 8336 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8337 8338 // Collect info for non-contiguous. Notice that offset, count, and stride 8339 // are only meaningful for array-section, so we insert a null for anything 8340 // other than array-section. 8341 // Also, the size of offset, count, and stride are not the same as 8342 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8343 // count, and stride are the same as the number of non-contiguous 8344 // declaration in target update to/from clause. 8345 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8346 Components) { 8347 const Expr *AssocExpr = Component.getAssociatedExpression(); 8348 8349 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8350 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8351 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8352 /*isSigned=*/false); 8353 CurOffsets.push_back(Offset); 8354 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8355 CurStrides.push_back(CurStrides.back()); 8356 continue; 8357 } 8358 8359 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8360 8361 if (!OASE) 8362 continue; 8363 8364 // Offset 8365 const Expr *OffsetExpr = OASE->getLowerBound(); 8366 llvm::Value *Offset = nullptr; 8367 if (!OffsetExpr) { 8368 // If offset is absent, then we just set it to zero. 8369 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8370 } else { 8371 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8372 CGF.Int64Ty, 8373 /*isSigned=*/false); 8374 } 8375 CurOffsets.push_back(Offset); 8376 8377 // Count 8378 const Expr *CountExpr = OASE->getLength(); 8379 llvm::Value *Count = nullptr; 8380 if (!CountExpr) { 8381 // In Clang, once a high dimension is an array section, we construct all 8382 // the lower dimension as array section, however, for case like 8383 // arr[0:2][2], Clang construct the inner dimension as an array section 8384 // but it actually is not in an array section form according to spec. 8385 if (!OASE->getColonLocFirst().isValid() && 8386 !OASE->getColonLocSecond().isValid()) { 8387 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8388 } else { 8389 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8390 // When the length is absent it defaults to ⌈(size − 8391 // lower-bound)/stride⌉, where size is the size of the array 8392 // dimension. 8393 const Expr *StrideExpr = OASE->getStride(); 8394 llvm::Value *Stride = 8395 StrideExpr 8396 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8397 CGF.Int64Ty, /*isSigned=*/false) 8398 : nullptr; 8399 if (Stride) 8400 Count = CGF.Builder.CreateUDiv( 8401 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8402 else 8403 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8404 } 8405 } else { 8406 Count = CGF.EmitScalarExpr(CountExpr); 8407 } 8408 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8409 CurCounts.push_back(Count); 8410 8411 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8412 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8413 // Offset Count Stride 8414 // D0 0 1 4 (int) <- dummy dimension 8415 // D1 0 2 8 (2 * (1) * 4) 8416 // D2 1 2 20 (1 * (1 * 5) * 4) 8417 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8418 const Expr *StrideExpr = OASE->getStride(); 8419 llvm::Value *Stride = 8420 StrideExpr 8421 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8422 CGF.Int64Ty, /*isSigned=*/false) 8423 : nullptr; 8424 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8425 if (Stride) 8426 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8427 else 8428 CurStrides.push_back(DimProd); 8429 if (DI != DimSizes.end()) 8430 ++DI; 8431 } 8432 8433 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8434 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8435 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8436 } 8437 8438 /// Return the adjusted map modifiers if the declaration a capture refers to 8439 /// appears in a first-private clause. This is expected to be used only with 8440 /// directives that start with 'target'. 8441 MappableExprsHandler::OpenMPOffloadMappingFlags 8442 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8443 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8444 8445 // A first private variable captured by reference will use only the 8446 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8447 // declaration is known as first-private in this handler. 8448 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8449 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8450 return MappableExprsHandler::OMP_MAP_TO | 8451 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8452 return MappableExprsHandler::OMP_MAP_PRIVATE | 8453 MappableExprsHandler::OMP_MAP_TO; 8454 } 8455 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8456 if (I != LambdasMap.end()) 8457 // for map(to: lambda): using user specified map type. 8458 return getMapTypeBits( 8459 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8460 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8461 /*AddPtrFlag=*/false, 8462 /*AddIsTargetParamFlag=*/false, 8463 /*isNonContiguous=*/false); 8464 return MappableExprsHandler::OMP_MAP_TO | 8465 MappableExprsHandler::OMP_MAP_FROM; 8466 } 8467 8468 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8469 // Rotate by getFlagMemberOffset() bits. 8470 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8471 << getFlagMemberOffset()); 8472 } 8473 8474 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8475 OpenMPOffloadMappingFlags MemberOfFlag) { 8476 // If the entry is PTR_AND_OBJ but has not been marked with the special 8477 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8478 // marked as MEMBER_OF. 8479 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8480 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8481 return; 8482 8483 // Reset the placeholder value to prepare the flag for the assignment of the 8484 // proper MEMBER_OF value. 8485 Flags &= ~OMP_MAP_MEMBER_OF; 8486 Flags |= MemberOfFlag; 8487 } 8488 8489 void getPlainLayout(const CXXRecordDecl *RD, 8490 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8491 bool AsBase) const { 8492 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8493 8494 llvm::StructType *St = 8495 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8496 8497 unsigned NumElements = St->getNumElements(); 8498 llvm::SmallVector< 8499 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8500 RecordLayout(NumElements); 8501 8502 // Fill bases. 8503 for (const auto &I : RD->bases()) { 8504 if (I.isVirtual()) 8505 continue; 8506 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8507 // Ignore empty bases. 8508 if (Base->isEmpty() || CGF.getContext() 8509 .getASTRecordLayout(Base) 8510 .getNonVirtualSize() 8511 .isZero()) 8512 continue; 8513 8514 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8515 RecordLayout[FieldIndex] = Base; 8516 } 8517 // Fill in virtual bases. 8518 for (const auto &I : RD->vbases()) { 8519 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8520 // Ignore empty bases. 8521 if (Base->isEmpty()) 8522 continue; 8523 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8524 if (RecordLayout[FieldIndex]) 8525 continue; 8526 RecordLayout[FieldIndex] = Base; 8527 } 8528 // Fill in all the fields. 8529 assert(!RD->isUnion() && "Unexpected union."); 8530 for (const auto *Field : RD->fields()) { 8531 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8532 // will fill in later.) 8533 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8534 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8535 RecordLayout[FieldIndex] = Field; 8536 } 8537 } 8538 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8539 &Data : RecordLayout) { 8540 if (Data.isNull()) 8541 continue; 8542 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8543 getPlainLayout(Base, Layout, /*AsBase=*/true); 8544 else 8545 Layout.push_back(Data.get<const FieldDecl *>()); 8546 } 8547 } 8548 8549 /// Generate all the base pointers, section pointers, sizes, map types, and 8550 /// mappers for the extracted mappable expressions (all included in \a 8551 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8552 /// pair of the relevant declaration and index where it occurs is appended to 8553 /// the device pointers info array. 8554 void generateAllInfoForClauses( 8555 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8556 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8557 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8558 // We have to process the component lists that relate with the same 8559 // declaration in a single chunk so that we can generate the map flags 8560 // correctly. Therefore, we organize all lists in a map. 8561 enum MapKind { Present, Allocs, Other, Total }; 8562 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8563 SmallVector<SmallVector<MapInfo, 8>, 4>> 8564 Info; 8565 8566 // Helper function to fill the information map for the different supported 8567 // clauses. 8568 auto &&InfoGen = 8569 [&Info, &SkipVarSet]( 8570 const ValueDecl *D, MapKind Kind, 8571 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8572 OpenMPMapClauseKind MapType, 8573 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8574 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8575 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8576 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8577 if (SkipVarSet.contains(D)) 8578 return; 8579 auto It = Info.find(D); 8580 if (It == Info.end()) 8581 It = Info 8582 .insert(std::make_pair( 8583 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8584 .first; 8585 It->second[Kind].emplace_back( 8586 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8587 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8588 }; 8589 8590 for (const auto *Cl : Clauses) { 8591 const auto *C = dyn_cast<OMPMapClause>(Cl); 8592 if (!C) 8593 continue; 8594 MapKind Kind = Other; 8595 if (llvm::is_contained(C->getMapTypeModifiers(), 8596 OMPC_MAP_MODIFIER_present)) 8597 Kind = Present; 8598 else if (C->getMapType() == OMPC_MAP_alloc) 8599 Kind = Allocs; 8600 const auto *EI = C->getVarRefs().begin(); 8601 for (const auto L : C->component_lists()) { 8602 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8603 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8604 C->getMapTypeModifiers(), llvm::None, 8605 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8606 E); 8607 ++EI; 8608 } 8609 } 8610 for (const auto *Cl : Clauses) { 8611 const auto *C = dyn_cast<OMPToClause>(Cl); 8612 if (!C) 8613 continue; 8614 MapKind Kind = Other; 8615 if (llvm::is_contained(C->getMotionModifiers(), 8616 OMPC_MOTION_MODIFIER_present)) 8617 Kind = Present; 8618 const auto *EI = C->getVarRefs().begin(); 8619 for (const auto L : C->component_lists()) { 8620 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8621 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8622 C->isImplicit(), std::get<2>(L), *EI); 8623 ++EI; 8624 } 8625 } 8626 for (const auto *Cl : Clauses) { 8627 const auto *C = dyn_cast<OMPFromClause>(Cl); 8628 if (!C) 8629 continue; 8630 MapKind Kind = Other; 8631 if (llvm::is_contained(C->getMotionModifiers(), 8632 OMPC_MOTION_MODIFIER_present)) 8633 Kind = Present; 8634 const auto *EI = C->getVarRefs().begin(); 8635 for (const auto L : C->component_lists()) { 8636 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8637 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8638 C->isImplicit(), std::get<2>(L), *EI); 8639 ++EI; 8640 } 8641 } 8642 8643 // Look at the use_device_ptr clause information and mark the existing map 8644 // entries as such. If there is no map information for an entry in the 8645 // use_device_ptr list, we create one with map type 'alloc' and zero size 8646 // section. It is the user fault if that was not mapped before. If there is 8647 // no map information and the pointer is a struct member, then we defer the 8648 // emission of that entry until the whole struct has been processed. 8649 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8650 SmallVector<DeferredDevicePtrEntryTy, 4>> 8651 DeferredInfo; 8652 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8653 8654 for (const auto *Cl : Clauses) { 8655 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8656 if (!C) 8657 continue; 8658 for (const auto L : C->component_lists()) { 8659 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8660 std::get<1>(L); 8661 assert(!Components.empty() && 8662 "Not expecting empty list of components!"); 8663 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8664 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8665 const Expr *IE = Components.back().getAssociatedExpression(); 8666 // If the first component is a member expression, we have to look into 8667 // 'this', which maps to null in the map of map information. Otherwise 8668 // look directly for the information. 8669 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8670 8671 // We potentially have map information for this declaration already. 8672 // Look for the first set of components that refer to it. 8673 if (It != Info.end()) { 8674 bool Found = false; 8675 for (auto &Data : It->second) { 8676 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8677 return MI.Components.back().getAssociatedDeclaration() == VD; 8678 }); 8679 // If we found a map entry, signal that the pointer has to be 8680 // returned and move on to the next declaration. Exclude cases where 8681 // the base pointer is mapped as array subscript, array section or 8682 // array shaping. The base address is passed as a pointer to base in 8683 // this case and cannot be used as a base for use_device_ptr list 8684 // item. 8685 if (CI != Data.end()) { 8686 auto PrevCI = std::next(CI->Components.rbegin()); 8687 const auto *VarD = dyn_cast<VarDecl>(VD); 8688 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8689 isa<MemberExpr>(IE) || 8690 !VD->getType().getNonReferenceType()->isPointerType() || 8691 PrevCI == CI->Components.rend() || 8692 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8693 VarD->hasLocalStorage()) { 8694 CI->ReturnDevicePointer = true; 8695 Found = true; 8696 break; 8697 } 8698 } 8699 } 8700 if (Found) 8701 continue; 8702 } 8703 8704 // We didn't find any match in our map information - generate a zero 8705 // size array section - if the pointer is a struct member we defer this 8706 // action until the whole struct has been processed. 8707 if (isa<MemberExpr>(IE)) { 8708 // Insert the pointer into Info to be processed by 8709 // generateInfoForComponentList. Because it is a member pointer 8710 // without a pointee, no entry will be generated for it, therefore 8711 // we need to generate one after the whole struct has been processed. 8712 // Nonetheless, generateInfoForComponentList must be called to take 8713 // the pointer into account for the calculation of the range of the 8714 // partial struct. 8715 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8716 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8717 nullptr); 8718 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8719 } else { 8720 llvm::Value *Ptr = 8721 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8722 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8723 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8724 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8725 UseDevicePtrCombinedInfo.Sizes.push_back( 8726 llvm::Constant::getNullValue(CGF.Int64Ty)); 8727 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8728 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8729 } 8730 } 8731 } 8732 8733 // Look at the use_device_addr clause information and mark the existing map 8734 // entries as such. If there is no map information for an entry in the 8735 // use_device_addr list, we create one with map type 'alloc' and zero size 8736 // section. It is the user fault if that was not mapped before. If there is 8737 // no map information and the pointer is a struct member, then we defer the 8738 // emission of that entry until the whole struct has been processed. 8739 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8740 for (const auto *Cl : Clauses) { 8741 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8742 if (!C) 8743 continue; 8744 for (const auto L : C->component_lists()) { 8745 assert(!std::get<1>(L).empty() && 8746 "Not expecting empty list of components!"); 8747 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8748 if (!Processed.insert(VD).second) 8749 continue; 8750 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8751 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8752 // If the first component is a member expression, we have to look into 8753 // 'this', which maps to null in the map of map information. Otherwise 8754 // look directly for the information. 8755 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8756 8757 // We potentially have map information for this declaration already. 8758 // Look for the first set of components that refer to it. 8759 if (It != Info.end()) { 8760 bool Found = false; 8761 for (auto &Data : It->second) { 8762 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8763 return MI.Components.back().getAssociatedDeclaration() == VD; 8764 }); 8765 // If we found a map entry, signal that the pointer has to be 8766 // returned and move on to the next declaration. 8767 if (CI != Data.end()) { 8768 CI->ReturnDevicePointer = true; 8769 Found = true; 8770 break; 8771 } 8772 } 8773 if (Found) 8774 continue; 8775 } 8776 8777 // We didn't find any match in our map information - generate a zero 8778 // size array section - if the pointer is a struct member we defer this 8779 // action until the whole struct has been processed. 8780 if (isa<MemberExpr>(IE)) { 8781 // Insert the pointer into Info to be processed by 8782 // generateInfoForComponentList. Because it is a member pointer 8783 // without a pointee, no entry will be generated for it, therefore 8784 // we need to generate one after the whole struct has been processed. 8785 // Nonetheless, generateInfoForComponentList must be called to take 8786 // the pointer into account for the calculation of the range of the 8787 // partial struct. 8788 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8789 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8790 nullptr, nullptr, /*ForDeviceAddr=*/true); 8791 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8792 } else { 8793 llvm::Value *Ptr; 8794 if (IE->isGLValue()) 8795 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8796 else 8797 Ptr = CGF.EmitScalarExpr(IE); 8798 CombinedInfo.Exprs.push_back(VD); 8799 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8800 CombinedInfo.Pointers.push_back(Ptr); 8801 CombinedInfo.Sizes.push_back( 8802 llvm::Constant::getNullValue(CGF.Int64Ty)); 8803 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8804 CombinedInfo.Mappers.push_back(nullptr); 8805 } 8806 } 8807 } 8808 8809 for (const auto &Data : Info) { 8810 StructRangeInfoTy PartialStruct; 8811 // Temporary generated information. 8812 MapCombinedInfoTy CurInfo; 8813 const Decl *D = Data.first; 8814 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8815 for (const auto &M : Data.second) { 8816 for (const MapInfo &L : M) { 8817 assert(!L.Components.empty() && 8818 "Not expecting declaration with no component lists."); 8819 8820 // Remember the current base pointer index. 8821 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8822 CurInfo.NonContigInfo.IsNonContiguous = 8823 L.Components.back().isNonContiguous(); 8824 generateInfoForComponentList( 8825 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8826 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8827 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8828 8829 // If this entry relates with a device pointer, set the relevant 8830 // declaration and add the 'return pointer' flag. 8831 if (L.ReturnDevicePointer) { 8832 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8833 "Unexpected number of mapped base pointers."); 8834 8835 const ValueDecl *RelevantVD = 8836 L.Components.back().getAssociatedDeclaration(); 8837 assert(RelevantVD && 8838 "No relevant declaration related with device pointer??"); 8839 8840 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8841 RelevantVD); 8842 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8843 } 8844 } 8845 } 8846 8847 // Append any pending zero-length pointers which are struct members and 8848 // used with use_device_ptr or use_device_addr. 8849 auto CI = DeferredInfo.find(Data.first); 8850 if (CI != DeferredInfo.end()) { 8851 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8852 llvm::Value *BasePtr; 8853 llvm::Value *Ptr; 8854 if (L.ForDeviceAddr) { 8855 if (L.IE->isGLValue()) 8856 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8857 else 8858 Ptr = this->CGF.EmitScalarExpr(L.IE); 8859 BasePtr = Ptr; 8860 // Entry is RETURN_PARAM. Also, set the placeholder value 8861 // MEMBER_OF=FFFF so that the entry is later updated with the 8862 // correct value of MEMBER_OF. 8863 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8864 } else { 8865 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8866 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8867 L.IE->getExprLoc()); 8868 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8869 // placeholder value MEMBER_OF=FFFF so that the entry is later 8870 // updated with the correct value of MEMBER_OF. 8871 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8872 OMP_MAP_MEMBER_OF); 8873 } 8874 CurInfo.Exprs.push_back(L.VD); 8875 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8876 CurInfo.Pointers.push_back(Ptr); 8877 CurInfo.Sizes.push_back( 8878 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8879 CurInfo.Mappers.push_back(nullptr); 8880 } 8881 } 8882 // If there is an entry in PartialStruct it means we have a struct with 8883 // individual members mapped. Emit an extra combined entry. 8884 if (PartialStruct.Base.isValid()) { 8885 CurInfo.NonContigInfo.Dims.push_back(0); 8886 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8887 } 8888 8889 // We need to append the results of this capture to what we already 8890 // have. 8891 CombinedInfo.append(CurInfo); 8892 } 8893 // Append data for use_device_ptr clauses. 8894 CombinedInfo.append(UseDevicePtrCombinedInfo); 8895 } 8896 8897 public: 8898 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8899 : CurDir(&Dir), CGF(CGF) { 8900 // Extract firstprivate clause information. 8901 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8902 for (const auto *D : C->varlists()) 8903 FirstPrivateDecls.try_emplace( 8904 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8905 // Extract implicit firstprivates from uses_allocators clauses. 8906 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8907 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8908 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8909 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8910 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8911 /*Implicit=*/true); 8912 else if (const auto *VD = dyn_cast<VarDecl>( 8913 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8914 ->getDecl())) 8915 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8916 } 8917 } 8918 // Extract device pointer clause information. 8919 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8920 for (auto L : C->component_lists()) 8921 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8922 // Extract map information. 8923 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8924 if (C->getMapType() != OMPC_MAP_to) 8925 continue; 8926 for (auto L : C->component_lists()) { 8927 const ValueDecl *VD = std::get<0>(L); 8928 const auto *RD = VD ? VD->getType() 8929 .getCanonicalType() 8930 .getNonReferenceType() 8931 ->getAsCXXRecordDecl() 8932 : nullptr; 8933 if (RD && RD->isLambda()) 8934 LambdasMap.try_emplace(std::get<0>(L), C); 8935 } 8936 } 8937 } 8938 8939 /// Constructor for the declare mapper directive. 8940 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8941 : CurDir(&Dir), CGF(CGF) {} 8942 8943 /// Generate code for the combined entry if we have a partially mapped struct 8944 /// and take care of the mapping flags of the arguments corresponding to 8945 /// individual struct members. 8946 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8947 MapFlagsArrayTy &CurTypes, 8948 const StructRangeInfoTy &PartialStruct, 8949 const ValueDecl *VD = nullptr, 8950 bool NotTargetParams = true) const { 8951 if (CurTypes.size() == 1 && 8952 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8953 !PartialStruct.IsArraySection) 8954 return; 8955 Address LBAddr = PartialStruct.LowestElem.second; 8956 Address HBAddr = PartialStruct.HighestElem.second; 8957 if (PartialStruct.HasCompleteRecord) { 8958 LBAddr = PartialStruct.LB; 8959 HBAddr = PartialStruct.LB; 8960 } 8961 CombinedInfo.Exprs.push_back(VD); 8962 // Base is the base of the struct 8963 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8964 // Pointer is the address of the lowest element 8965 llvm::Value *LB = LBAddr.getPointer(); 8966 CombinedInfo.Pointers.push_back(LB); 8967 // There should not be a mapper for a combined entry. 8968 CombinedInfo.Mappers.push_back(nullptr); 8969 // Size is (addr of {highest+1} element) - (addr of lowest element) 8970 llvm::Value *HB = HBAddr.getPointer(); 8971 llvm::Value *HAddr = 8972 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8973 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8974 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8975 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8976 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8977 /*isSigned=*/false); 8978 CombinedInfo.Sizes.push_back(Size); 8979 // Map type is always TARGET_PARAM, if generate info for captures. 8980 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8981 : OMP_MAP_TARGET_PARAM); 8982 // If any element has the present modifier, then make sure the runtime 8983 // doesn't attempt to allocate the struct. 8984 if (CurTypes.end() != 8985 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8986 return Type & OMP_MAP_PRESENT; 8987 })) 8988 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8989 // Remove TARGET_PARAM flag from the first element 8990 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8991 // If any element has the ompx_hold modifier, then make sure the runtime 8992 // uses the hold reference count for the struct as a whole so that it won't 8993 // be unmapped by an extra dynamic reference count decrement. Add it to all 8994 // elements as well so the runtime knows which reference count to check 8995 // when determining whether it's time for device-to-host transfers of 8996 // individual elements. 8997 if (CurTypes.end() != 8998 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8999 return Type & OMP_MAP_OMPX_HOLD; 9000 })) { 9001 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 9002 for (auto &M : CurTypes) 9003 M |= OMP_MAP_OMPX_HOLD; 9004 } 9005 9006 // All other current entries will be MEMBER_OF the combined entry 9007 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9008 // 0xFFFF in the MEMBER_OF field). 9009 OpenMPOffloadMappingFlags MemberOfFlag = 9010 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9011 for (auto &M : CurTypes) 9012 setCorrectMemberOfFlag(M, MemberOfFlag); 9013 } 9014 9015 /// Generate all the base pointers, section pointers, sizes, map types, and 9016 /// mappers for the extracted mappable expressions (all included in \a 9017 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9018 /// pair of the relevant declaration and index where it occurs is appended to 9019 /// the device pointers info array. 9020 void generateAllInfo( 9021 MapCombinedInfoTy &CombinedInfo, 9022 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9023 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9024 assert(CurDir.is<const OMPExecutableDirective *>() && 9025 "Expect a executable directive"); 9026 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9027 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9028 } 9029 9030 /// Generate all the base pointers, section pointers, sizes, map types, and 9031 /// mappers for the extracted map clauses of user-defined mapper (all included 9032 /// in \a CombinedInfo). 9033 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9034 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9035 "Expect a declare mapper directive"); 9036 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9037 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9038 } 9039 9040 /// Emit capture info for lambdas for variables captured by reference. 9041 void generateInfoForLambdaCaptures( 9042 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9043 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9044 const auto *RD = VD->getType() 9045 .getCanonicalType() 9046 .getNonReferenceType() 9047 ->getAsCXXRecordDecl(); 9048 if (!RD || !RD->isLambda()) 9049 return; 9050 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9051 LValue VDLVal = CGF.MakeAddrLValue( 9052 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9053 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9054 FieldDecl *ThisCapture = nullptr; 9055 RD->getCaptureFields(Captures, ThisCapture); 9056 if (ThisCapture) { 9057 LValue ThisLVal = 9058 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9059 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9060 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9061 VDLVal.getPointer(CGF)); 9062 CombinedInfo.Exprs.push_back(VD); 9063 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9064 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9065 CombinedInfo.Sizes.push_back( 9066 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9067 CGF.Int64Ty, /*isSigned=*/true)); 9068 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9069 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9070 CombinedInfo.Mappers.push_back(nullptr); 9071 } 9072 for (const LambdaCapture &LC : RD->captures()) { 9073 if (!LC.capturesVariable()) 9074 continue; 9075 const VarDecl *VD = LC.getCapturedVar(); 9076 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9077 continue; 9078 auto It = Captures.find(VD); 9079 assert(It != Captures.end() && "Found lambda capture without field."); 9080 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9081 if (LC.getCaptureKind() == LCK_ByRef) { 9082 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9083 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9084 VDLVal.getPointer(CGF)); 9085 CombinedInfo.Exprs.push_back(VD); 9086 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9087 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9088 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9089 CGF.getTypeSize( 9090 VD->getType().getCanonicalType().getNonReferenceType()), 9091 CGF.Int64Ty, /*isSigned=*/true)); 9092 } else { 9093 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9094 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9095 VDLVal.getPointer(CGF)); 9096 CombinedInfo.Exprs.push_back(VD); 9097 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9098 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9099 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9100 } 9101 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9102 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9103 CombinedInfo.Mappers.push_back(nullptr); 9104 } 9105 } 9106 9107 /// Set correct indices for lambdas captures. 9108 void adjustMemberOfForLambdaCaptures( 9109 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9110 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9111 MapFlagsArrayTy &Types) const { 9112 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9113 // Set correct member_of idx for all implicit lambda captures. 9114 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9115 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9116 continue; 9117 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9118 assert(BasePtr && "Unable to find base lambda address."); 9119 int TgtIdx = -1; 9120 for (unsigned J = I; J > 0; --J) { 9121 unsigned Idx = J - 1; 9122 if (Pointers[Idx] != BasePtr) 9123 continue; 9124 TgtIdx = Idx; 9125 break; 9126 } 9127 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9128 // All other current entries will be MEMBER_OF the combined entry 9129 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9130 // 0xFFFF in the MEMBER_OF field). 9131 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9132 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9133 } 9134 } 9135 9136 /// Generate the base pointers, section pointers, sizes, map types, and 9137 /// mappers associated to a given capture (all included in \a CombinedInfo). 9138 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9139 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9140 StructRangeInfoTy &PartialStruct) const { 9141 assert(!Cap->capturesVariableArrayType() && 9142 "Not expecting to generate map info for a variable array type!"); 9143 9144 // We need to know when we generating information for the first component 9145 const ValueDecl *VD = Cap->capturesThis() 9146 ? nullptr 9147 : Cap->getCapturedVar()->getCanonicalDecl(); 9148 9149 // for map(to: lambda): skip here, processing it in 9150 // generateDefaultMapInfo 9151 if (LambdasMap.count(VD)) 9152 return; 9153 9154 // If this declaration appears in a is_device_ptr clause we just have to 9155 // pass the pointer by value. If it is a reference to a declaration, we just 9156 // pass its value. 9157 if (DevPointersMap.count(VD)) { 9158 CombinedInfo.Exprs.push_back(VD); 9159 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9160 CombinedInfo.Pointers.push_back(Arg); 9161 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9162 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9163 /*isSigned=*/true)); 9164 CombinedInfo.Types.push_back( 9165 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9166 OMP_MAP_TARGET_PARAM); 9167 CombinedInfo.Mappers.push_back(nullptr); 9168 return; 9169 } 9170 9171 using MapData = 9172 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9173 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9174 const ValueDecl *, const Expr *>; 9175 SmallVector<MapData, 4> DeclComponentLists; 9176 assert(CurDir.is<const OMPExecutableDirective *>() && 9177 "Expect a executable directive"); 9178 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9179 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9180 const auto *EI = C->getVarRefs().begin(); 9181 for (const auto L : C->decl_component_lists(VD)) { 9182 const ValueDecl *VDecl, *Mapper; 9183 // The Expression is not correct if the mapping is implicit 9184 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9185 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9186 std::tie(VDecl, Components, Mapper) = L; 9187 assert(VDecl == VD && "We got information for the wrong declaration??"); 9188 assert(!Components.empty() && 9189 "Not expecting declaration with no component lists."); 9190 DeclComponentLists.emplace_back(Components, C->getMapType(), 9191 C->getMapTypeModifiers(), 9192 C->isImplicit(), Mapper, E); 9193 ++EI; 9194 } 9195 } 9196 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9197 const MapData &RHS) { 9198 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9199 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9200 bool HasPresent = 9201 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9202 bool HasAllocs = MapType == OMPC_MAP_alloc; 9203 MapModifiers = std::get<2>(RHS); 9204 MapType = std::get<1>(LHS); 9205 bool HasPresentR = 9206 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9207 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9208 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9209 }); 9210 9211 // Find overlapping elements (including the offset from the base element). 9212 llvm::SmallDenseMap< 9213 const MapData *, 9214 llvm::SmallVector< 9215 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9216 4> 9217 OverlappedData; 9218 size_t Count = 0; 9219 for (const MapData &L : DeclComponentLists) { 9220 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9221 OpenMPMapClauseKind MapType; 9222 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9223 bool IsImplicit; 9224 const ValueDecl *Mapper; 9225 const Expr *VarRef; 9226 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9227 L; 9228 ++Count; 9229 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9230 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9231 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9232 VarRef) = L1; 9233 auto CI = Components.rbegin(); 9234 auto CE = Components.rend(); 9235 auto SI = Components1.rbegin(); 9236 auto SE = Components1.rend(); 9237 for (; CI != CE && SI != SE; ++CI, ++SI) { 9238 if (CI->getAssociatedExpression()->getStmtClass() != 9239 SI->getAssociatedExpression()->getStmtClass()) 9240 break; 9241 // Are we dealing with different variables/fields? 9242 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9243 break; 9244 } 9245 // Found overlapping if, at least for one component, reached the head 9246 // of the components list. 9247 if (CI == CE || SI == SE) { 9248 // Ignore it if it is the same component. 9249 if (CI == CE && SI == SE) 9250 continue; 9251 const auto It = (SI == SE) ? CI : SI; 9252 // If one component is a pointer and another one is a kind of 9253 // dereference of this pointer (array subscript, section, dereference, 9254 // etc.), it is not an overlapping. 9255 // Same, if one component is a base and another component is a 9256 // dereferenced pointer memberexpr with the same base. 9257 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9258 (std::prev(It)->getAssociatedDeclaration() && 9259 std::prev(It) 9260 ->getAssociatedDeclaration() 9261 ->getType() 9262 ->isPointerType()) || 9263 (It->getAssociatedDeclaration() && 9264 It->getAssociatedDeclaration()->getType()->isPointerType() && 9265 std::next(It) != CE && std::next(It) != SE)) 9266 continue; 9267 const MapData &BaseData = CI == CE ? L : L1; 9268 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9269 SI == SE ? Components : Components1; 9270 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9271 OverlappedElements.getSecond().push_back(SubData); 9272 } 9273 } 9274 } 9275 // Sort the overlapped elements for each item. 9276 llvm::SmallVector<const FieldDecl *, 4> Layout; 9277 if (!OverlappedData.empty()) { 9278 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9279 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9280 while (BaseType != OrigType) { 9281 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9282 OrigType = BaseType->getPointeeOrArrayElementType(); 9283 } 9284 9285 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9286 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9287 else { 9288 const auto *RD = BaseType->getAsRecordDecl(); 9289 Layout.append(RD->field_begin(), RD->field_end()); 9290 } 9291 } 9292 for (auto &Pair : OverlappedData) { 9293 llvm::stable_sort( 9294 Pair.getSecond(), 9295 [&Layout]( 9296 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9297 OMPClauseMappableExprCommon::MappableExprComponentListRef 9298 Second) { 9299 auto CI = First.rbegin(); 9300 auto CE = First.rend(); 9301 auto SI = Second.rbegin(); 9302 auto SE = Second.rend(); 9303 for (; CI != CE && SI != SE; ++CI, ++SI) { 9304 if (CI->getAssociatedExpression()->getStmtClass() != 9305 SI->getAssociatedExpression()->getStmtClass()) 9306 break; 9307 // Are we dealing with different variables/fields? 9308 if (CI->getAssociatedDeclaration() != 9309 SI->getAssociatedDeclaration()) 9310 break; 9311 } 9312 9313 // Lists contain the same elements. 9314 if (CI == CE && SI == SE) 9315 return false; 9316 9317 // List with less elements is less than list with more elements. 9318 if (CI == CE || SI == SE) 9319 return CI == CE; 9320 9321 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9322 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9323 if (FD1->getParent() == FD2->getParent()) 9324 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9325 const auto *It = 9326 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9327 return FD == FD1 || FD == FD2; 9328 }); 9329 return *It == FD1; 9330 }); 9331 } 9332 9333 // Associated with a capture, because the mapping flags depend on it. 9334 // Go through all of the elements with the overlapped elements. 9335 bool IsFirstComponentList = true; 9336 for (const auto &Pair : OverlappedData) { 9337 const MapData &L = *Pair.getFirst(); 9338 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9339 OpenMPMapClauseKind MapType; 9340 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9341 bool IsImplicit; 9342 const ValueDecl *Mapper; 9343 const Expr *VarRef; 9344 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9345 L; 9346 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9347 OverlappedComponents = Pair.getSecond(); 9348 generateInfoForComponentList( 9349 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9350 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9351 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9352 IsFirstComponentList = false; 9353 } 9354 // Go through other elements without overlapped elements. 9355 for (const MapData &L : DeclComponentLists) { 9356 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9357 OpenMPMapClauseKind MapType; 9358 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9359 bool IsImplicit; 9360 const ValueDecl *Mapper; 9361 const Expr *VarRef; 9362 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9363 L; 9364 auto It = OverlappedData.find(&L); 9365 if (It == OverlappedData.end()) 9366 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9367 Components, CombinedInfo, PartialStruct, 9368 IsFirstComponentList, IsImplicit, Mapper, 9369 /*ForDeviceAddr=*/false, VD, VarRef); 9370 IsFirstComponentList = false; 9371 } 9372 } 9373 9374 /// Generate the default map information for a given capture \a CI, 9375 /// record field declaration \a RI and captured value \a CV. 9376 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9377 const FieldDecl &RI, llvm::Value *CV, 9378 MapCombinedInfoTy &CombinedInfo) const { 9379 bool IsImplicit = true; 9380 // Do the default mapping. 9381 if (CI.capturesThis()) { 9382 CombinedInfo.Exprs.push_back(nullptr); 9383 CombinedInfo.BasePointers.push_back(CV); 9384 CombinedInfo.Pointers.push_back(CV); 9385 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9386 CombinedInfo.Sizes.push_back( 9387 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9388 CGF.Int64Ty, /*isSigned=*/true)); 9389 // Default map type. 9390 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9391 } else if (CI.capturesVariableByCopy()) { 9392 const VarDecl *VD = CI.getCapturedVar(); 9393 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9394 CombinedInfo.BasePointers.push_back(CV); 9395 CombinedInfo.Pointers.push_back(CV); 9396 if (!RI.getType()->isAnyPointerType()) { 9397 // We have to signal to the runtime captures passed by value that are 9398 // not pointers. 9399 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9400 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9401 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9402 } else { 9403 // Pointers are implicitly mapped with a zero size and no flags 9404 // (other than first map that is added for all implicit maps). 9405 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9406 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9407 } 9408 auto I = FirstPrivateDecls.find(VD); 9409 if (I != FirstPrivateDecls.end()) 9410 IsImplicit = I->getSecond(); 9411 } else { 9412 assert(CI.capturesVariable() && "Expected captured reference."); 9413 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9414 QualType ElementType = PtrTy->getPointeeType(); 9415 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9416 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9417 // The default map type for a scalar/complex type is 'to' because by 9418 // default the value doesn't have to be retrieved. For an aggregate 9419 // type, the default is 'tofrom'. 9420 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9421 const VarDecl *VD = CI.getCapturedVar(); 9422 auto I = FirstPrivateDecls.find(VD); 9423 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9424 CombinedInfo.BasePointers.push_back(CV); 9425 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9426 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9427 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9428 AlignmentSource::Decl)); 9429 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9430 } else { 9431 CombinedInfo.Pointers.push_back(CV); 9432 } 9433 if (I != FirstPrivateDecls.end()) 9434 IsImplicit = I->getSecond(); 9435 } 9436 // Every default map produces a single argument which is a target parameter. 9437 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9438 9439 // Add flag stating this is an implicit map. 9440 if (IsImplicit) 9441 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9442 9443 // No user-defined mapper for default mapping. 9444 CombinedInfo.Mappers.push_back(nullptr); 9445 } 9446 }; 9447 } // anonymous namespace 9448 9449 static void emitNonContiguousDescriptor( 9450 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9451 CGOpenMPRuntime::TargetDataInfo &Info) { 9452 CodeGenModule &CGM = CGF.CGM; 9453 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9454 &NonContigInfo = CombinedInfo.NonContigInfo; 9455 9456 // Build an array of struct descriptor_dim and then assign it to 9457 // offload_args. 9458 // 9459 // struct descriptor_dim { 9460 // uint64_t offset; 9461 // uint64_t count; 9462 // uint64_t stride 9463 // }; 9464 ASTContext &C = CGF.getContext(); 9465 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9466 RecordDecl *RD; 9467 RD = C.buildImplicitRecord("descriptor_dim"); 9468 RD->startDefinition(); 9469 addFieldToRecordDecl(C, RD, Int64Ty); 9470 addFieldToRecordDecl(C, RD, Int64Ty); 9471 addFieldToRecordDecl(C, RD, Int64Ty); 9472 RD->completeDefinition(); 9473 QualType DimTy = C.getRecordType(RD); 9474 9475 enum { OffsetFD = 0, CountFD, StrideFD }; 9476 // We need two index variable here since the size of "Dims" is the same as the 9477 // size of Components, however, the size of offset, count, and stride is equal 9478 // to the size of base declaration that is non-contiguous. 9479 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9480 // Skip emitting ir if dimension size is 1 since it cannot be 9481 // non-contiguous. 9482 if (NonContigInfo.Dims[I] == 1) 9483 continue; 9484 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9485 QualType ArrayTy = 9486 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9487 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9488 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9489 unsigned RevIdx = EE - II - 1; 9490 LValue DimsLVal = CGF.MakeAddrLValue( 9491 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9492 // Offset 9493 LValue OffsetLVal = CGF.EmitLValueForField( 9494 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9495 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9496 // Count 9497 LValue CountLVal = CGF.EmitLValueForField( 9498 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9499 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9500 // Stride 9501 LValue StrideLVal = CGF.EmitLValueForField( 9502 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9503 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9504 } 9505 // args[I] = &dims 9506 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9507 DimsAddr, CGM.Int8PtrTy); 9508 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9509 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9510 Info.PointersArray, 0, I); 9511 Address PAddr(P, CGF.getPointerAlign()); 9512 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9513 ++L; 9514 } 9515 } 9516 9517 // Try to extract the base declaration from a `this->x` expression if possible. 9518 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9519 if (!E) 9520 return nullptr; 9521 9522 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9523 if (const MemberExpr *ME = 9524 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9525 return ME->getMemberDecl(); 9526 return nullptr; 9527 } 9528 9529 /// Emit a string constant containing the names of the values mapped to the 9530 /// offloading runtime library. 9531 llvm::Constant * 9532 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9533 MappableExprsHandler::MappingExprInfo &MapExprs) { 9534 9535 uint32_t SrcLocStrSize; 9536 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9537 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 9538 9539 SourceLocation Loc; 9540 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9541 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9542 Loc = VD->getLocation(); 9543 else 9544 Loc = MapExprs.getMapExpr()->getExprLoc(); 9545 } else { 9546 Loc = MapExprs.getMapDecl()->getLocation(); 9547 } 9548 9549 std::string ExprName; 9550 if (MapExprs.getMapExpr()) { 9551 PrintingPolicy P(CGF.getContext().getLangOpts()); 9552 llvm::raw_string_ostream OS(ExprName); 9553 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9554 OS.flush(); 9555 } else { 9556 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9557 } 9558 9559 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9560 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 9561 PLoc.getLine(), PLoc.getColumn(), 9562 SrcLocStrSize); 9563 } 9564 9565 /// Emit the arrays used to pass the captures and map information to the 9566 /// offloading runtime library. If there is no map or capture information, 9567 /// return nullptr by reference. 9568 static void emitOffloadingArrays( 9569 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9570 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9571 bool IsNonContiguous = false) { 9572 CodeGenModule &CGM = CGF.CGM; 9573 ASTContext &Ctx = CGF.getContext(); 9574 9575 // Reset the array information. 9576 Info.clearArrayInfo(); 9577 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9578 9579 if (Info.NumberOfPtrs) { 9580 // Detect if we have any capture size requiring runtime evaluation of the 9581 // size so that a constant array could be eventually used. 9582 bool hasRuntimeEvaluationCaptureSize = false; 9583 for (llvm::Value *S : CombinedInfo.Sizes) 9584 if (!isa<llvm::Constant>(S)) { 9585 hasRuntimeEvaluationCaptureSize = true; 9586 break; 9587 } 9588 9589 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9590 QualType PointerArrayType = Ctx.getConstantArrayType( 9591 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9592 /*IndexTypeQuals=*/0); 9593 9594 Info.BasePointersArray = 9595 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9596 Info.PointersArray = 9597 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9598 Address MappersArray = 9599 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9600 Info.MappersArray = MappersArray.getPointer(); 9601 9602 // If we don't have any VLA types or other types that require runtime 9603 // evaluation, we can use a constant array for the map sizes, otherwise we 9604 // need to fill up the arrays as we do for the pointers. 9605 QualType Int64Ty = 9606 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9607 if (hasRuntimeEvaluationCaptureSize) { 9608 QualType SizeArrayType = Ctx.getConstantArrayType( 9609 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9610 /*IndexTypeQuals=*/0); 9611 Info.SizesArray = 9612 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9613 } else { 9614 // We expect all the sizes to be constant, so we collect them to create 9615 // a constant array. 9616 SmallVector<llvm::Constant *, 16> ConstSizes; 9617 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9618 if (IsNonContiguous && 9619 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9620 ConstSizes.push_back(llvm::ConstantInt::get( 9621 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9622 } else { 9623 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9624 } 9625 } 9626 9627 auto *SizesArrayInit = llvm::ConstantArray::get( 9628 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9629 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9630 auto *SizesArrayGbl = new llvm::GlobalVariable( 9631 CGM.getModule(), SizesArrayInit->getType(), 9632 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9633 SizesArrayInit, Name); 9634 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9635 Info.SizesArray = SizesArrayGbl; 9636 } 9637 9638 // The map types are always constant so we don't need to generate code to 9639 // fill arrays. Instead, we create an array constant. 9640 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9641 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9642 std::string MaptypesName = 9643 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9644 auto *MapTypesArrayGbl = 9645 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9646 Info.MapTypesArray = MapTypesArrayGbl; 9647 9648 // The information types are only built if there is debug information 9649 // requested. 9650 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9651 Info.MapNamesArray = llvm::Constant::getNullValue( 9652 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9653 } else { 9654 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9655 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9656 }; 9657 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9658 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9659 std::string MapnamesName = 9660 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9661 auto *MapNamesArrayGbl = 9662 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9663 Info.MapNamesArray = MapNamesArrayGbl; 9664 } 9665 9666 // If there's a present map type modifier, it must not be applied to the end 9667 // of a region, so generate a separate map type array in that case. 9668 if (Info.separateBeginEndCalls()) { 9669 bool EndMapTypesDiffer = false; 9670 for (uint64_t &Type : Mapping) { 9671 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9672 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9673 EndMapTypesDiffer = true; 9674 } 9675 } 9676 if (EndMapTypesDiffer) { 9677 MapTypesArrayGbl = 9678 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9679 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9680 } 9681 } 9682 9683 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9684 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9685 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9686 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9687 Info.BasePointersArray, 0, I); 9688 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9689 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9690 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9691 CGF.Builder.CreateStore(BPVal, BPAddr); 9692 9693 if (Info.requiresDevicePointerInfo()) 9694 if (const ValueDecl *DevVD = 9695 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9696 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9697 9698 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9699 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9700 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9701 Info.PointersArray, 0, I); 9702 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9703 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9704 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9705 CGF.Builder.CreateStore(PVal, PAddr); 9706 9707 if (hasRuntimeEvaluationCaptureSize) { 9708 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9709 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9710 Info.SizesArray, 9711 /*Idx0=*/0, 9712 /*Idx1=*/I); 9713 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9714 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9715 CGM.Int64Ty, 9716 /*isSigned=*/true), 9717 SAddr); 9718 } 9719 9720 // Fill up the mapper array. 9721 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9722 if (CombinedInfo.Mappers[I]) { 9723 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9724 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9725 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9726 Info.HasMapper = true; 9727 } 9728 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9729 CGF.Builder.CreateStore(MFunc, MAddr); 9730 } 9731 } 9732 9733 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9734 Info.NumberOfPtrs == 0) 9735 return; 9736 9737 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9738 } 9739 9740 namespace { 9741 /// Additional arguments for emitOffloadingArraysArgument function. 9742 struct ArgumentsOptions { 9743 bool ForEndCall = false; 9744 ArgumentsOptions() = default; 9745 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9746 }; 9747 } // namespace 9748 9749 /// Emit the arguments to be passed to the runtime library based on the 9750 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9751 /// ForEndCall, emit map types to be passed for the end of the region instead of 9752 /// the beginning. 9753 static void emitOffloadingArraysArgument( 9754 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9755 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9756 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9757 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9758 const ArgumentsOptions &Options = ArgumentsOptions()) { 9759 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9760 "expected region end call to runtime only when end call is separate"); 9761 CodeGenModule &CGM = CGF.CGM; 9762 if (Info.NumberOfPtrs) { 9763 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9764 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9765 Info.BasePointersArray, 9766 /*Idx0=*/0, /*Idx1=*/0); 9767 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9768 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9769 Info.PointersArray, 9770 /*Idx0=*/0, 9771 /*Idx1=*/0); 9772 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9773 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9774 /*Idx0=*/0, /*Idx1=*/0); 9775 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9776 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9777 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9778 : Info.MapTypesArray, 9779 /*Idx0=*/0, 9780 /*Idx1=*/0); 9781 9782 // Only emit the mapper information arrays if debug information is 9783 // requested. 9784 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9785 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9786 else 9787 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9788 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9789 Info.MapNamesArray, 9790 /*Idx0=*/0, 9791 /*Idx1=*/0); 9792 // If there is no user-defined mapper, set the mapper array to nullptr to 9793 // avoid an unnecessary data privatization 9794 if (!Info.HasMapper) 9795 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9796 else 9797 MappersArrayArg = 9798 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9799 } else { 9800 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9801 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9802 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9803 MapTypesArrayArg = 9804 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9805 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9806 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9807 } 9808 } 9809 9810 /// Check for inner distribute directive. 9811 static const OMPExecutableDirective * 9812 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9813 const auto *CS = D.getInnermostCapturedStmt(); 9814 const auto *Body = 9815 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9816 const Stmt *ChildStmt = 9817 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9818 9819 if (const auto *NestedDir = 9820 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9821 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9822 switch (D.getDirectiveKind()) { 9823 case OMPD_target: 9824 if (isOpenMPDistributeDirective(DKind)) 9825 return NestedDir; 9826 if (DKind == OMPD_teams) { 9827 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9828 /*IgnoreCaptured=*/true); 9829 if (!Body) 9830 return nullptr; 9831 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9832 if (const auto *NND = 9833 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9834 DKind = NND->getDirectiveKind(); 9835 if (isOpenMPDistributeDirective(DKind)) 9836 return NND; 9837 } 9838 } 9839 return nullptr; 9840 case OMPD_target_teams: 9841 if (isOpenMPDistributeDirective(DKind)) 9842 return NestedDir; 9843 return nullptr; 9844 case OMPD_target_parallel: 9845 case OMPD_target_simd: 9846 case OMPD_target_parallel_for: 9847 case OMPD_target_parallel_for_simd: 9848 return nullptr; 9849 case OMPD_target_teams_distribute: 9850 case OMPD_target_teams_distribute_simd: 9851 case OMPD_target_teams_distribute_parallel_for: 9852 case OMPD_target_teams_distribute_parallel_for_simd: 9853 case OMPD_parallel: 9854 case OMPD_for: 9855 case OMPD_parallel_for: 9856 case OMPD_parallel_master: 9857 case OMPD_parallel_sections: 9858 case OMPD_for_simd: 9859 case OMPD_parallel_for_simd: 9860 case OMPD_cancel: 9861 case OMPD_cancellation_point: 9862 case OMPD_ordered: 9863 case OMPD_threadprivate: 9864 case OMPD_allocate: 9865 case OMPD_task: 9866 case OMPD_simd: 9867 case OMPD_tile: 9868 case OMPD_unroll: 9869 case OMPD_sections: 9870 case OMPD_section: 9871 case OMPD_single: 9872 case OMPD_master: 9873 case OMPD_critical: 9874 case OMPD_taskyield: 9875 case OMPD_barrier: 9876 case OMPD_taskwait: 9877 case OMPD_taskgroup: 9878 case OMPD_atomic: 9879 case OMPD_flush: 9880 case OMPD_depobj: 9881 case OMPD_scan: 9882 case OMPD_teams: 9883 case OMPD_target_data: 9884 case OMPD_target_exit_data: 9885 case OMPD_target_enter_data: 9886 case OMPD_distribute: 9887 case OMPD_distribute_simd: 9888 case OMPD_distribute_parallel_for: 9889 case OMPD_distribute_parallel_for_simd: 9890 case OMPD_teams_distribute: 9891 case OMPD_teams_distribute_simd: 9892 case OMPD_teams_distribute_parallel_for: 9893 case OMPD_teams_distribute_parallel_for_simd: 9894 case OMPD_target_update: 9895 case OMPD_declare_simd: 9896 case OMPD_declare_variant: 9897 case OMPD_begin_declare_variant: 9898 case OMPD_end_declare_variant: 9899 case OMPD_declare_target: 9900 case OMPD_end_declare_target: 9901 case OMPD_declare_reduction: 9902 case OMPD_declare_mapper: 9903 case OMPD_taskloop: 9904 case OMPD_taskloop_simd: 9905 case OMPD_master_taskloop: 9906 case OMPD_master_taskloop_simd: 9907 case OMPD_parallel_master_taskloop: 9908 case OMPD_parallel_master_taskloop_simd: 9909 case OMPD_requires: 9910 case OMPD_metadirective: 9911 case OMPD_unknown: 9912 default: 9913 llvm_unreachable("Unexpected directive."); 9914 } 9915 } 9916 9917 return nullptr; 9918 } 9919 9920 /// Emit the user-defined mapper function. The code generation follows the 9921 /// pattern in the example below. 9922 /// \code 9923 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9924 /// void *base, void *begin, 9925 /// int64_t size, int64_t type, 9926 /// void *name = nullptr) { 9927 /// // Allocate space for an array section first or add a base/begin for 9928 /// // pointer dereference. 9929 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9930 /// !maptype.IsDelete) 9931 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9932 /// size*sizeof(Ty), clearToFromMember(type)); 9933 /// // Map members. 9934 /// for (unsigned i = 0; i < size; i++) { 9935 /// // For each component specified by this mapper: 9936 /// for (auto c : begin[i]->all_components) { 9937 /// if (c.hasMapper()) 9938 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9939 /// c.arg_type, c.arg_name); 9940 /// else 9941 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9942 /// c.arg_begin, c.arg_size, c.arg_type, 9943 /// c.arg_name); 9944 /// } 9945 /// } 9946 /// // Delete the array section. 9947 /// if (size > 1 && maptype.IsDelete) 9948 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9949 /// size*sizeof(Ty), clearToFromMember(type)); 9950 /// } 9951 /// \endcode 9952 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9953 CodeGenFunction *CGF) { 9954 if (UDMMap.count(D) > 0) 9955 return; 9956 ASTContext &C = CGM.getContext(); 9957 QualType Ty = D->getType(); 9958 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9959 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9960 auto *MapperVarDecl = 9961 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9962 SourceLocation Loc = D->getLocation(); 9963 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9964 9965 // Prepare mapper function arguments and attributes. 9966 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9967 C.VoidPtrTy, ImplicitParamDecl::Other); 9968 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9969 ImplicitParamDecl::Other); 9970 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9971 C.VoidPtrTy, ImplicitParamDecl::Other); 9972 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9973 ImplicitParamDecl::Other); 9974 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9975 ImplicitParamDecl::Other); 9976 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9977 ImplicitParamDecl::Other); 9978 FunctionArgList Args; 9979 Args.push_back(&HandleArg); 9980 Args.push_back(&BaseArg); 9981 Args.push_back(&BeginArg); 9982 Args.push_back(&SizeArg); 9983 Args.push_back(&TypeArg); 9984 Args.push_back(&NameArg); 9985 const CGFunctionInfo &FnInfo = 9986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9987 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9988 SmallString<64> TyStr; 9989 llvm::raw_svector_ostream Out(TyStr); 9990 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9991 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9992 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9993 Name, &CGM.getModule()); 9994 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9995 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9996 // Start the mapper function code generation. 9997 CodeGenFunction MapperCGF(CGM); 9998 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9999 // Compute the starting and end addresses of array elements. 10000 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 10001 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 10002 C.getPointerType(Int64Ty), Loc); 10003 // Prepare common arguments for array initiation and deletion. 10004 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 10005 MapperCGF.GetAddrOfLocalVar(&HandleArg), 10006 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10007 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10008 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10009 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10010 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10011 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10012 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10013 // Convert the size in bytes into the number of array elements. 10014 Size = MapperCGF.Builder.CreateExactUDiv( 10015 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10016 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10017 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10018 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10019 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10020 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10021 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10022 C.getPointerType(Int64Ty), Loc); 10023 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10024 MapperCGF.GetAddrOfLocalVar(&NameArg), 10025 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10026 10027 // Emit array initiation if this is an array section and \p MapType indicates 10028 // that memory allocation is required. 10029 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10030 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10031 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10032 10033 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10034 10035 // Emit the loop header block. 10036 MapperCGF.EmitBlock(HeadBB); 10037 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10038 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10039 // Evaluate whether the initial condition is satisfied. 10040 llvm::Value *IsEmpty = 10041 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10042 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10043 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10044 10045 // Emit the loop body block. 10046 MapperCGF.EmitBlock(BodyBB); 10047 llvm::BasicBlock *LastBB = BodyBB; 10048 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10049 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10050 PtrPHI->addIncoming(PtrBegin, EntryBB); 10051 Address PtrCurrent = 10052 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10053 .getAlignment() 10054 .alignmentOfArrayElement(ElementSize)); 10055 // Privatize the declared variable of mapper to be the current array element. 10056 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10057 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10058 (void)Scope.Privatize(); 10059 10060 // Get map clause information. Fill up the arrays with all mapped variables. 10061 MappableExprsHandler::MapCombinedInfoTy Info; 10062 MappableExprsHandler MEHandler(*D, MapperCGF); 10063 MEHandler.generateAllInfoForMapper(Info); 10064 10065 // Call the runtime API __tgt_mapper_num_components to get the number of 10066 // pre-existing components. 10067 llvm::Value *OffloadingArgs[] = {Handle}; 10068 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10069 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10070 OMPRTL___tgt_mapper_num_components), 10071 OffloadingArgs); 10072 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10073 PreviousSize, 10074 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10075 10076 // Fill up the runtime mapper handle for all components. 10077 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10078 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10079 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10080 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10081 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10082 llvm::Value *CurSizeArg = Info.Sizes[I]; 10083 llvm::Value *CurNameArg = 10084 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10085 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10086 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10087 10088 // Extract the MEMBER_OF field from the map type. 10089 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10090 llvm::Value *MemberMapType = 10091 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10092 10093 // Combine the map type inherited from user-defined mapper with that 10094 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10095 // bits of the \a MapType, which is the input argument of the mapper 10096 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10097 // bits of MemberMapType. 10098 // [OpenMP 5.0], 1.2.6. map-type decay. 10099 // | alloc | to | from | tofrom | release | delete 10100 // ---------------------------------------------------------- 10101 // alloc | alloc | alloc | alloc | alloc | release | delete 10102 // to | alloc | to | alloc | to | release | delete 10103 // from | alloc | alloc | from | from | release | delete 10104 // tofrom | alloc | to | from | tofrom | release | delete 10105 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10106 MapType, 10107 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10108 MappableExprsHandler::OMP_MAP_FROM)); 10109 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10110 llvm::BasicBlock *AllocElseBB = 10111 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10112 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10113 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10114 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10115 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10116 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10117 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10118 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10119 MapperCGF.EmitBlock(AllocBB); 10120 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10121 MemberMapType, 10122 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10123 MappableExprsHandler::OMP_MAP_FROM))); 10124 MapperCGF.Builder.CreateBr(EndBB); 10125 MapperCGF.EmitBlock(AllocElseBB); 10126 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10127 LeftToFrom, 10128 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10129 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10130 // In case of to, clear OMP_MAP_FROM. 10131 MapperCGF.EmitBlock(ToBB); 10132 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10133 MemberMapType, 10134 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10135 MapperCGF.Builder.CreateBr(EndBB); 10136 MapperCGF.EmitBlock(ToElseBB); 10137 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10138 LeftToFrom, 10139 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10140 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10141 // In case of from, clear OMP_MAP_TO. 10142 MapperCGF.EmitBlock(FromBB); 10143 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10144 MemberMapType, 10145 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10146 // In case of tofrom, do nothing. 10147 MapperCGF.EmitBlock(EndBB); 10148 LastBB = EndBB; 10149 llvm::PHINode *CurMapType = 10150 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10151 CurMapType->addIncoming(AllocMapType, AllocBB); 10152 CurMapType->addIncoming(ToMapType, ToBB); 10153 CurMapType->addIncoming(FromMapType, FromBB); 10154 CurMapType->addIncoming(MemberMapType, ToElseBB); 10155 10156 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10157 CurSizeArg, CurMapType, CurNameArg}; 10158 if (Info.Mappers[I]) { 10159 // Call the corresponding mapper function. 10160 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10161 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10162 assert(MapperFunc && "Expect a valid mapper function is available."); 10163 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10164 } else { 10165 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10166 // data structure. 10167 MapperCGF.EmitRuntimeCall( 10168 OMPBuilder.getOrCreateRuntimeFunction( 10169 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10170 OffloadingArgs); 10171 } 10172 } 10173 10174 // Update the pointer to point to the next element that needs to be mapped, 10175 // and check whether we have mapped all elements. 10176 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10177 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10178 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10179 PtrPHI->addIncoming(PtrNext, LastBB); 10180 llvm::Value *IsDone = 10181 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10182 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10183 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10184 10185 MapperCGF.EmitBlock(ExitBB); 10186 // Emit array deletion if this is an array section and \p MapType indicates 10187 // that deletion is required. 10188 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10189 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10190 10191 // Emit the function exit block. 10192 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10193 MapperCGF.FinishFunction(); 10194 UDMMap.try_emplace(D, Fn); 10195 if (CGF) { 10196 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10197 Decls.second.push_back(D); 10198 } 10199 } 10200 10201 /// Emit the array initialization or deletion portion for user-defined mapper 10202 /// code generation. First, it evaluates whether an array section is mapped and 10203 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10204 /// true, and \a MapType indicates to not delete this array, array 10205 /// initialization code is generated. If \a IsInit is false, and \a MapType 10206 /// indicates to not this array, array deletion code is generated. 10207 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10208 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10209 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10210 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10211 bool IsInit) { 10212 StringRef Prefix = IsInit ? ".init" : ".del"; 10213 10214 // Evaluate if this is an array section. 10215 llvm::BasicBlock *BodyBB = 10216 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10217 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10218 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10219 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10220 MapType, 10221 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10222 llvm::Value *DeleteCond; 10223 llvm::Value *Cond; 10224 if (IsInit) { 10225 // base != begin? 10226 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 10227 // IsPtrAndObj? 10228 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10229 MapType, 10230 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10231 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10232 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10233 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10234 DeleteCond = MapperCGF.Builder.CreateIsNull( 10235 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10236 } else { 10237 Cond = IsArray; 10238 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10239 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10240 } 10241 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10242 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10243 10244 MapperCGF.EmitBlock(BodyBB); 10245 // Get the array size by multiplying element size and element number (i.e., \p 10246 // Size). 10247 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10248 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10249 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10250 // memory allocation/deletion purpose only. 10251 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10252 MapType, 10253 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10254 MappableExprsHandler::OMP_MAP_FROM))); 10255 MapTypeArg = MapperCGF.Builder.CreateOr( 10256 MapTypeArg, 10257 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10258 10259 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10260 // data structure. 10261 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10262 ArraySize, MapTypeArg, MapName}; 10263 MapperCGF.EmitRuntimeCall( 10264 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10265 OMPRTL___tgt_push_mapper_component), 10266 OffloadingArgs); 10267 } 10268 10269 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10270 const OMPDeclareMapperDecl *D) { 10271 auto I = UDMMap.find(D); 10272 if (I != UDMMap.end()) 10273 return I->second; 10274 emitUserDefinedMapper(D); 10275 return UDMMap.lookup(D); 10276 } 10277 10278 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10279 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10280 llvm::Value *DeviceID, 10281 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10282 const OMPLoopDirective &D)> 10283 SizeEmitter) { 10284 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10285 const OMPExecutableDirective *TD = &D; 10286 // Get nested teams distribute kind directive, if any. 10287 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10288 TD = getNestedDistributeDirective(CGM.getContext(), D); 10289 if (!TD) 10290 return; 10291 const auto *LD = cast<OMPLoopDirective>(TD); 10292 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10293 PrePostActionTy &) { 10294 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10296 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10297 CGF.EmitRuntimeCall( 10298 OMPBuilder.getOrCreateRuntimeFunction( 10299 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10300 Args); 10301 } 10302 }; 10303 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10304 } 10305 10306 void CGOpenMPRuntime::emitTargetCall( 10307 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10308 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10309 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10310 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10311 const OMPLoopDirective &D)> 10312 SizeEmitter) { 10313 if (!CGF.HaveInsertPoint()) 10314 return; 10315 10316 assert(OutlinedFn && "Invalid outlined function!"); 10317 10318 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10319 D.hasClausesOfKind<OMPNowaitClause>(); 10320 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10321 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10322 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10323 PrePostActionTy &) { 10324 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10325 }; 10326 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10327 10328 CodeGenFunction::OMPTargetDataInfo InputInfo; 10329 llvm::Value *MapTypesArray = nullptr; 10330 llvm::Value *MapNamesArray = nullptr; 10331 // Fill up the pointer arrays and transfer execution to the device. 10332 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10333 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10334 &CapturedVars, 10335 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10336 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10337 // Reverse offloading is not supported, so just execute on the host. 10338 if (RequiresOuterTask) { 10339 CapturedVars.clear(); 10340 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10341 } 10342 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10343 return; 10344 } 10345 10346 // On top of the arrays that were filled up, the target offloading call 10347 // takes as arguments the device id as well as the host pointer. The host 10348 // pointer is used by the runtime library to identify the current target 10349 // region, so it only has to be unique and not necessarily point to 10350 // anything. It could be the pointer to the outlined function that 10351 // implements the target region, but we aren't using that so that the 10352 // compiler doesn't need to keep that, and could therefore inline the host 10353 // function if proven worthwhile during optimization. 10354 10355 // From this point on, we need to have an ID of the target region defined. 10356 assert(OutlinedFnID && "Invalid outlined function ID!"); 10357 10358 // Emit device ID if any. 10359 llvm::Value *DeviceID; 10360 if (Device.getPointer()) { 10361 assert((Device.getInt() == OMPC_DEVICE_unknown || 10362 Device.getInt() == OMPC_DEVICE_device_num) && 10363 "Expected device_num modifier."); 10364 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10365 DeviceID = 10366 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10367 } else { 10368 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10369 } 10370 10371 // Emit the number of elements in the offloading arrays. 10372 llvm::Value *PointerNum = 10373 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10374 10375 // Return value of the runtime offloading call. 10376 llvm::Value *Return; 10377 10378 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10379 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10380 10381 // Source location for the ident struct 10382 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10383 10384 // Emit tripcount for the target loop-based directive. 10385 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10386 10387 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10388 // The target region is an outlined function launched by the runtime 10389 // via calls __tgt_target() or __tgt_target_teams(). 10390 // 10391 // __tgt_target() launches a target region with one team and one thread, 10392 // executing a serial region. This master thread may in turn launch 10393 // more threads within its team upon encountering a parallel region, 10394 // however, no additional teams can be launched on the device. 10395 // 10396 // __tgt_target_teams() launches a target region with one or more teams, 10397 // each with one or more threads. This call is required for target 10398 // constructs such as: 10399 // 'target teams' 10400 // 'target' / 'teams' 10401 // 'target teams distribute parallel for' 10402 // 'target parallel' 10403 // and so on. 10404 // 10405 // Note that on the host and CPU targets, the runtime implementation of 10406 // these calls simply call the outlined function without forking threads. 10407 // The outlined functions themselves have runtime calls to 10408 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10409 // the compiler in emitTeamsCall() and emitParallelCall(). 10410 // 10411 // In contrast, on the NVPTX target, the implementation of 10412 // __tgt_target_teams() launches a GPU kernel with the requested number 10413 // of teams and threads so no additional calls to the runtime are required. 10414 if (NumTeams) { 10415 // If we have NumTeams defined this means that we have an enclosed teams 10416 // region. Therefore we also expect to have NumThreads defined. These two 10417 // values should be defined in the presence of a teams directive, 10418 // regardless of having any clauses associated. If the user is using teams 10419 // but no clauses, these two values will be the default that should be 10420 // passed to the runtime library - a 32-bit integer with the value zero. 10421 assert(NumThreads && "Thread limit expression should be available along " 10422 "with number of teams."); 10423 SmallVector<llvm::Value *> OffloadingArgs = { 10424 RTLoc, 10425 DeviceID, 10426 OutlinedFnID, 10427 PointerNum, 10428 InputInfo.BasePointersArray.getPointer(), 10429 InputInfo.PointersArray.getPointer(), 10430 InputInfo.SizesArray.getPointer(), 10431 MapTypesArray, 10432 MapNamesArray, 10433 InputInfo.MappersArray.getPointer(), 10434 NumTeams, 10435 NumThreads}; 10436 if (HasNowait) { 10437 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10438 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10439 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10440 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10441 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10442 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10443 } 10444 Return = CGF.EmitRuntimeCall( 10445 OMPBuilder.getOrCreateRuntimeFunction( 10446 CGM.getModule(), HasNowait 10447 ? OMPRTL___tgt_target_teams_nowait_mapper 10448 : OMPRTL___tgt_target_teams_mapper), 10449 OffloadingArgs); 10450 } else { 10451 SmallVector<llvm::Value *> OffloadingArgs = { 10452 RTLoc, 10453 DeviceID, 10454 OutlinedFnID, 10455 PointerNum, 10456 InputInfo.BasePointersArray.getPointer(), 10457 InputInfo.PointersArray.getPointer(), 10458 InputInfo.SizesArray.getPointer(), 10459 MapTypesArray, 10460 MapNamesArray, 10461 InputInfo.MappersArray.getPointer()}; 10462 if (HasNowait) { 10463 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10464 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10465 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10466 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10467 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10468 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10469 } 10470 Return = CGF.EmitRuntimeCall( 10471 OMPBuilder.getOrCreateRuntimeFunction( 10472 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10473 : OMPRTL___tgt_target_mapper), 10474 OffloadingArgs); 10475 } 10476 10477 // Check the error code and execute the host version if required. 10478 llvm::BasicBlock *OffloadFailedBlock = 10479 CGF.createBasicBlock("omp_offload.failed"); 10480 llvm::BasicBlock *OffloadContBlock = 10481 CGF.createBasicBlock("omp_offload.cont"); 10482 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10483 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10484 10485 CGF.EmitBlock(OffloadFailedBlock); 10486 if (RequiresOuterTask) { 10487 CapturedVars.clear(); 10488 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10489 } 10490 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10491 CGF.EmitBranch(OffloadContBlock); 10492 10493 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10494 }; 10495 10496 // Notify that the host version must be executed. 10497 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10498 RequiresOuterTask](CodeGenFunction &CGF, 10499 PrePostActionTy &) { 10500 if (RequiresOuterTask) { 10501 CapturedVars.clear(); 10502 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10503 } 10504 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10505 }; 10506 10507 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10508 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10509 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10510 // Fill up the arrays with all the captured variables. 10511 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10512 10513 // Get mappable expression information. 10514 MappableExprsHandler MEHandler(D, CGF); 10515 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10516 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10517 10518 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10519 auto *CV = CapturedVars.begin(); 10520 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10521 CE = CS.capture_end(); 10522 CI != CE; ++CI, ++RI, ++CV) { 10523 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10524 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10525 10526 // VLA sizes are passed to the outlined region by copy and do not have map 10527 // information associated. 10528 if (CI->capturesVariableArrayType()) { 10529 CurInfo.Exprs.push_back(nullptr); 10530 CurInfo.BasePointers.push_back(*CV); 10531 CurInfo.Pointers.push_back(*CV); 10532 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10533 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10534 // Copy to the device as an argument. No need to retrieve it. 10535 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10536 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10537 MappableExprsHandler::OMP_MAP_IMPLICIT); 10538 CurInfo.Mappers.push_back(nullptr); 10539 } else { 10540 // If we have any information in the map clause, we use it, otherwise we 10541 // just do a default mapping. 10542 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10543 if (!CI->capturesThis()) 10544 MappedVarSet.insert(CI->getCapturedVar()); 10545 else 10546 MappedVarSet.insert(nullptr); 10547 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10548 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10549 // Generate correct mapping for variables captured by reference in 10550 // lambdas. 10551 if (CI->capturesVariable()) 10552 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10553 CurInfo, LambdaPointers); 10554 } 10555 // We expect to have at least an element of information for this capture. 10556 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10557 "Non-existing map pointer for capture!"); 10558 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10559 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10560 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10561 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10562 "Inconsistent map information sizes!"); 10563 10564 // If there is an entry in PartialStruct it means we have a struct with 10565 // individual members mapped. Emit an extra combined entry. 10566 if (PartialStruct.Base.isValid()) { 10567 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10568 MEHandler.emitCombinedEntry( 10569 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10570 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10571 } 10572 10573 // We need to append the results of this capture to what we already have. 10574 CombinedInfo.append(CurInfo); 10575 } 10576 // Adjust MEMBER_OF flags for the lambdas captures. 10577 MEHandler.adjustMemberOfForLambdaCaptures( 10578 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10579 CombinedInfo.Types); 10580 // Map any list items in a map clause that were not captures because they 10581 // weren't referenced within the construct. 10582 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10583 10584 TargetDataInfo Info; 10585 // Fill up the arrays and create the arguments. 10586 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10587 emitOffloadingArraysArgument( 10588 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10589 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10590 {/*ForEndCall=*/false}); 10591 10592 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10593 InputInfo.BasePointersArray = 10594 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10595 InputInfo.PointersArray = 10596 Address(Info.PointersArray, CGM.getPointerAlign()); 10597 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10598 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10599 MapTypesArray = Info.MapTypesArray; 10600 MapNamesArray = Info.MapNamesArray; 10601 if (RequiresOuterTask) 10602 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10603 else 10604 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10605 }; 10606 10607 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10608 CodeGenFunction &CGF, PrePostActionTy &) { 10609 if (RequiresOuterTask) { 10610 CodeGenFunction::OMPTargetDataInfo InputInfo; 10611 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10612 } else { 10613 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10614 } 10615 }; 10616 10617 // If we have a target function ID it means that we need to support 10618 // offloading, otherwise, just execute on the host. We need to execute on host 10619 // regardless of the conditional in the if clause if, e.g., the user do not 10620 // specify target triples. 10621 if (OutlinedFnID) { 10622 if (IfCond) { 10623 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10624 } else { 10625 RegionCodeGenTy ThenRCG(TargetThenGen); 10626 ThenRCG(CGF); 10627 } 10628 } else { 10629 RegionCodeGenTy ElseRCG(TargetElseGen); 10630 ElseRCG(CGF); 10631 } 10632 } 10633 10634 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10635 StringRef ParentName) { 10636 if (!S) 10637 return; 10638 10639 // Codegen OMP target directives that offload compute to the device. 10640 bool RequiresDeviceCodegen = 10641 isa<OMPExecutableDirective>(S) && 10642 isOpenMPTargetExecutionDirective( 10643 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10644 10645 if (RequiresDeviceCodegen) { 10646 const auto &E = *cast<OMPExecutableDirective>(S); 10647 unsigned DeviceID; 10648 unsigned FileID; 10649 unsigned Line; 10650 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10651 FileID, Line); 10652 10653 // Is this a target region that should not be emitted as an entry point? If 10654 // so just signal we are done with this target region. 10655 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10656 ParentName, Line)) 10657 return; 10658 10659 switch (E.getDirectiveKind()) { 10660 case OMPD_target: 10661 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10662 cast<OMPTargetDirective>(E)); 10663 break; 10664 case OMPD_target_parallel: 10665 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10666 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10667 break; 10668 case OMPD_target_teams: 10669 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10670 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10671 break; 10672 case OMPD_target_teams_distribute: 10673 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10674 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10675 break; 10676 case OMPD_target_teams_distribute_simd: 10677 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10678 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10679 break; 10680 case OMPD_target_parallel_for: 10681 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10682 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10683 break; 10684 case OMPD_target_parallel_for_simd: 10685 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10686 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10687 break; 10688 case OMPD_target_simd: 10689 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10690 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10691 break; 10692 case OMPD_target_teams_distribute_parallel_for: 10693 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10694 CGM, ParentName, 10695 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10696 break; 10697 case OMPD_target_teams_distribute_parallel_for_simd: 10698 CodeGenFunction:: 10699 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10700 CGM, ParentName, 10701 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10702 break; 10703 case OMPD_parallel: 10704 case OMPD_for: 10705 case OMPD_parallel_for: 10706 case OMPD_parallel_master: 10707 case OMPD_parallel_sections: 10708 case OMPD_for_simd: 10709 case OMPD_parallel_for_simd: 10710 case OMPD_cancel: 10711 case OMPD_cancellation_point: 10712 case OMPD_ordered: 10713 case OMPD_threadprivate: 10714 case OMPD_allocate: 10715 case OMPD_task: 10716 case OMPD_simd: 10717 case OMPD_tile: 10718 case OMPD_unroll: 10719 case OMPD_sections: 10720 case OMPD_section: 10721 case OMPD_single: 10722 case OMPD_master: 10723 case OMPD_critical: 10724 case OMPD_taskyield: 10725 case OMPD_barrier: 10726 case OMPD_taskwait: 10727 case OMPD_taskgroup: 10728 case OMPD_atomic: 10729 case OMPD_flush: 10730 case OMPD_depobj: 10731 case OMPD_scan: 10732 case OMPD_teams: 10733 case OMPD_target_data: 10734 case OMPD_target_exit_data: 10735 case OMPD_target_enter_data: 10736 case OMPD_distribute: 10737 case OMPD_distribute_simd: 10738 case OMPD_distribute_parallel_for: 10739 case OMPD_distribute_parallel_for_simd: 10740 case OMPD_teams_distribute: 10741 case OMPD_teams_distribute_simd: 10742 case OMPD_teams_distribute_parallel_for: 10743 case OMPD_teams_distribute_parallel_for_simd: 10744 case OMPD_target_update: 10745 case OMPD_declare_simd: 10746 case OMPD_declare_variant: 10747 case OMPD_begin_declare_variant: 10748 case OMPD_end_declare_variant: 10749 case OMPD_declare_target: 10750 case OMPD_end_declare_target: 10751 case OMPD_declare_reduction: 10752 case OMPD_declare_mapper: 10753 case OMPD_taskloop: 10754 case OMPD_taskloop_simd: 10755 case OMPD_master_taskloop: 10756 case OMPD_master_taskloop_simd: 10757 case OMPD_parallel_master_taskloop: 10758 case OMPD_parallel_master_taskloop_simd: 10759 case OMPD_requires: 10760 case OMPD_metadirective: 10761 case OMPD_unknown: 10762 default: 10763 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10764 } 10765 return; 10766 } 10767 10768 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10769 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10770 return; 10771 10772 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10773 return; 10774 } 10775 10776 // If this is a lambda function, look into its body. 10777 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10778 S = L->getBody(); 10779 10780 // Keep looking for target regions recursively. 10781 for (const Stmt *II : S->children()) 10782 scanForTargetRegionsFunctions(II, ParentName); 10783 } 10784 10785 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10786 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10787 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10788 if (!DevTy) 10789 return false; 10790 // Do not emit device_type(nohost) functions for the host. 10791 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10792 return true; 10793 // Do not emit device_type(host) functions for the device. 10794 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10795 return true; 10796 return false; 10797 } 10798 10799 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10800 // If emitting code for the host, we do not process FD here. Instead we do 10801 // the normal code generation. 10802 if (!CGM.getLangOpts().OpenMPIsDevice) { 10803 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10804 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10805 CGM.getLangOpts().OpenMPIsDevice)) 10806 return true; 10807 return false; 10808 } 10809 10810 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10811 // Try to detect target regions in the function. 10812 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10813 StringRef Name = CGM.getMangledName(GD); 10814 scanForTargetRegionsFunctions(FD->getBody(), Name); 10815 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10816 CGM.getLangOpts().OpenMPIsDevice)) 10817 return true; 10818 } 10819 10820 // Do not to emit function if it is not marked as declare target. 10821 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10822 AlreadyEmittedTargetDecls.count(VD) == 0; 10823 } 10824 10825 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10826 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10827 CGM.getLangOpts().OpenMPIsDevice)) 10828 return true; 10829 10830 if (!CGM.getLangOpts().OpenMPIsDevice) 10831 return false; 10832 10833 // Check if there are Ctors/Dtors in this declaration and look for target 10834 // regions in it. We use the complete variant to produce the kernel name 10835 // mangling. 10836 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10837 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10838 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10839 StringRef ParentName = 10840 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10841 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10842 } 10843 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10844 StringRef ParentName = 10845 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10846 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10847 } 10848 } 10849 10850 // Do not to emit variable if it is not marked as declare target. 10851 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10852 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10853 cast<VarDecl>(GD.getDecl())); 10854 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10855 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10856 HasRequiresUnifiedSharedMemory)) { 10857 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10858 return true; 10859 } 10860 return false; 10861 } 10862 10863 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10864 llvm::Constant *Addr) { 10865 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10866 !CGM.getLangOpts().OpenMPIsDevice) 10867 return; 10868 10869 // If we have host/nohost variables, they do not need to be registered. 10870 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10871 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10872 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10873 return; 10874 10875 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10876 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10877 if (!Res) { 10878 if (CGM.getLangOpts().OpenMPIsDevice) { 10879 // Register non-target variables being emitted in device code (debug info 10880 // may cause this). 10881 StringRef VarName = CGM.getMangledName(VD); 10882 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10883 } 10884 return; 10885 } 10886 // Register declare target variables. 10887 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10888 StringRef VarName; 10889 CharUnits VarSize; 10890 llvm::GlobalValue::LinkageTypes Linkage; 10891 10892 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10893 !HasRequiresUnifiedSharedMemory) { 10894 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10895 VarName = CGM.getMangledName(VD); 10896 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10897 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10898 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10899 } else { 10900 VarSize = CharUnits::Zero(); 10901 } 10902 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10903 // Temp solution to prevent optimizations of the internal variables. 10904 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10905 // Do not create a "ref-variable" if the original is not also available 10906 // on the host. 10907 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10908 return; 10909 std::string RefName = getName({VarName, "ref"}); 10910 if (!CGM.GetGlobalValue(RefName)) { 10911 llvm::Constant *AddrRef = 10912 getOrCreateInternalVariable(Addr->getType(), RefName); 10913 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10914 GVAddrRef->setConstant(/*Val=*/true); 10915 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10916 GVAddrRef->setInitializer(Addr); 10917 CGM.addCompilerUsedGlobal(GVAddrRef); 10918 } 10919 } 10920 } else { 10921 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10922 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10923 HasRequiresUnifiedSharedMemory)) && 10924 "Declare target attribute must link or to with unified memory."); 10925 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10926 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10927 else 10928 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10929 10930 if (CGM.getLangOpts().OpenMPIsDevice) { 10931 VarName = Addr->getName(); 10932 Addr = nullptr; 10933 } else { 10934 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10935 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10936 } 10937 VarSize = CGM.getPointerSize(); 10938 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10939 } 10940 10941 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10942 VarName, Addr, VarSize, Flags, Linkage); 10943 } 10944 10945 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10946 if (isa<FunctionDecl>(GD.getDecl()) || 10947 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10948 return emitTargetFunctions(GD); 10949 10950 return emitTargetGlobalVariable(GD); 10951 } 10952 10953 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10954 for (const VarDecl *VD : DeferredGlobalVariables) { 10955 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10956 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10957 if (!Res) 10958 continue; 10959 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10960 !HasRequiresUnifiedSharedMemory) { 10961 CGM.EmitGlobal(VD); 10962 } else { 10963 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10964 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10965 HasRequiresUnifiedSharedMemory)) && 10966 "Expected link clause or to clause with unified memory."); 10967 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10968 } 10969 } 10970 } 10971 10972 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10973 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10974 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10975 " Expected target-based directive."); 10976 } 10977 10978 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10979 for (const OMPClause *Clause : D->clauselists()) { 10980 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10981 HasRequiresUnifiedSharedMemory = true; 10982 } else if (const auto *AC = 10983 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10984 switch (AC->getAtomicDefaultMemOrderKind()) { 10985 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10986 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10987 break; 10988 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10989 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10990 break; 10991 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10992 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10993 break; 10994 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10995 break; 10996 } 10997 } 10998 } 10999 } 11000 11001 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 11002 return RequiresAtomicOrdering; 11003 } 11004 11005 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11006 LangAS &AS) { 11007 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11008 return false; 11009 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11010 switch(A->getAllocatorType()) { 11011 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11012 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11013 // Not supported, fallback to the default mem space. 11014 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11015 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11016 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11017 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11018 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11019 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11020 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11021 AS = LangAS::Default; 11022 return true; 11023 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11024 llvm_unreachable("Expected predefined allocator for the variables with the " 11025 "static storage."); 11026 } 11027 return false; 11028 } 11029 11030 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11031 return HasRequiresUnifiedSharedMemory; 11032 } 11033 11034 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11035 CodeGenModule &CGM) 11036 : CGM(CGM) { 11037 if (CGM.getLangOpts().OpenMPIsDevice) { 11038 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11039 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11040 } 11041 } 11042 11043 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11044 if (CGM.getLangOpts().OpenMPIsDevice) 11045 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11046 } 11047 11048 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11049 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11050 return true; 11051 11052 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11053 // Do not to emit function if it is marked as declare target as it was already 11054 // emitted. 11055 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11056 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11057 if (auto *F = dyn_cast_or_null<llvm::Function>( 11058 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11059 return !F->isDeclaration(); 11060 return false; 11061 } 11062 return true; 11063 } 11064 11065 return !AlreadyEmittedTargetDecls.insert(D).second; 11066 } 11067 11068 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11069 // If we don't have entries or if we are emitting code for the device, we 11070 // don't need to do anything. 11071 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11072 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11073 (OffloadEntriesInfoManager.empty() && 11074 !HasEmittedDeclareTargetRegion && 11075 !HasEmittedTargetRegion)) 11076 return nullptr; 11077 11078 // Create and register the function that handles the requires directives. 11079 ASTContext &C = CGM.getContext(); 11080 11081 llvm::Function *RequiresRegFn; 11082 { 11083 CodeGenFunction CGF(CGM); 11084 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11085 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11086 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11087 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11088 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11089 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11090 // TODO: check for other requires clauses. 11091 // The requires directive takes effect only when a target region is 11092 // present in the compilation unit. Otherwise it is ignored and not 11093 // passed to the runtime. This avoids the runtime from throwing an error 11094 // for mismatching requires clauses across compilation units that don't 11095 // contain at least 1 target region. 11096 assert((HasEmittedTargetRegion || 11097 HasEmittedDeclareTargetRegion || 11098 !OffloadEntriesInfoManager.empty()) && 11099 "Target or declare target region expected."); 11100 if (HasRequiresUnifiedSharedMemory) 11101 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11103 CGM.getModule(), OMPRTL___tgt_register_requires), 11104 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11105 CGF.FinishFunction(); 11106 } 11107 return RequiresRegFn; 11108 } 11109 11110 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11111 const OMPExecutableDirective &D, 11112 SourceLocation Loc, 11113 llvm::Function *OutlinedFn, 11114 ArrayRef<llvm::Value *> CapturedVars) { 11115 if (!CGF.HaveInsertPoint()) 11116 return; 11117 11118 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11119 CodeGenFunction::RunCleanupsScope Scope(CGF); 11120 11121 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11122 llvm::Value *Args[] = { 11123 RTLoc, 11124 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11125 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11126 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11127 RealArgs.append(std::begin(Args), std::end(Args)); 11128 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11129 11130 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11131 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11132 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11133 } 11134 11135 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11136 const Expr *NumTeams, 11137 const Expr *ThreadLimit, 11138 SourceLocation Loc) { 11139 if (!CGF.HaveInsertPoint()) 11140 return; 11141 11142 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11143 11144 llvm::Value *NumTeamsVal = 11145 NumTeams 11146 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11147 CGF.CGM.Int32Ty, /* isSigned = */ true) 11148 : CGF.Builder.getInt32(0); 11149 11150 llvm::Value *ThreadLimitVal = 11151 ThreadLimit 11152 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11153 CGF.CGM.Int32Ty, /* isSigned = */ true) 11154 : CGF.Builder.getInt32(0); 11155 11156 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11157 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11158 ThreadLimitVal}; 11159 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11160 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11161 PushNumTeamsArgs); 11162 } 11163 11164 void CGOpenMPRuntime::emitTargetDataCalls( 11165 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11166 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11167 if (!CGF.HaveInsertPoint()) 11168 return; 11169 11170 // Action used to replace the default codegen action and turn privatization 11171 // off. 11172 PrePostActionTy NoPrivAction; 11173 11174 // Generate the code for the opening of the data environment. Capture all the 11175 // arguments of the runtime call by reference because they are used in the 11176 // closing of the region. 11177 auto &&BeginThenGen = [this, &D, Device, &Info, 11178 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11179 // Fill up the arrays with all the mapped variables. 11180 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11181 11182 // Get map clause information. 11183 MappableExprsHandler MEHandler(D, CGF); 11184 MEHandler.generateAllInfo(CombinedInfo); 11185 11186 // Fill up the arrays and create the arguments. 11187 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11188 /*IsNonContiguous=*/true); 11189 11190 llvm::Value *BasePointersArrayArg = nullptr; 11191 llvm::Value *PointersArrayArg = nullptr; 11192 llvm::Value *SizesArrayArg = nullptr; 11193 llvm::Value *MapTypesArrayArg = nullptr; 11194 llvm::Value *MapNamesArrayArg = nullptr; 11195 llvm::Value *MappersArrayArg = nullptr; 11196 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11197 SizesArrayArg, MapTypesArrayArg, 11198 MapNamesArrayArg, MappersArrayArg, Info); 11199 11200 // Emit device ID if any. 11201 llvm::Value *DeviceID = nullptr; 11202 if (Device) { 11203 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11204 CGF.Int64Ty, /*isSigned=*/true); 11205 } else { 11206 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11207 } 11208 11209 // Emit the number of elements in the offloading arrays. 11210 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11211 // 11212 // Source location for the ident struct 11213 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11214 11215 llvm::Value *OffloadingArgs[] = {RTLoc, 11216 DeviceID, 11217 PointerNum, 11218 BasePointersArrayArg, 11219 PointersArrayArg, 11220 SizesArrayArg, 11221 MapTypesArrayArg, 11222 MapNamesArrayArg, 11223 MappersArrayArg}; 11224 CGF.EmitRuntimeCall( 11225 OMPBuilder.getOrCreateRuntimeFunction( 11226 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11227 OffloadingArgs); 11228 11229 // If device pointer privatization is required, emit the body of the region 11230 // here. It will have to be duplicated: with and without privatization. 11231 if (!Info.CaptureDeviceAddrMap.empty()) 11232 CodeGen(CGF); 11233 }; 11234 11235 // Generate code for the closing of the data region. 11236 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11237 PrePostActionTy &) { 11238 assert(Info.isValid() && "Invalid data environment closing arguments."); 11239 11240 llvm::Value *BasePointersArrayArg = nullptr; 11241 llvm::Value *PointersArrayArg = nullptr; 11242 llvm::Value *SizesArrayArg = nullptr; 11243 llvm::Value *MapTypesArrayArg = nullptr; 11244 llvm::Value *MapNamesArrayArg = nullptr; 11245 llvm::Value *MappersArrayArg = nullptr; 11246 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11247 SizesArrayArg, MapTypesArrayArg, 11248 MapNamesArrayArg, MappersArrayArg, Info, 11249 {/*ForEndCall=*/true}); 11250 11251 // Emit device ID if any. 11252 llvm::Value *DeviceID = nullptr; 11253 if (Device) { 11254 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11255 CGF.Int64Ty, /*isSigned=*/true); 11256 } else { 11257 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11258 } 11259 11260 // Emit the number of elements in the offloading arrays. 11261 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11262 11263 // Source location for the ident struct 11264 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11265 11266 llvm::Value *OffloadingArgs[] = {RTLoc, 11267 DeviceID, 11268 PointerNum, 11269 BasePointersArrayArg, 11270 PointersArrayArg, 11271 SizesArrayArg, 11272 MapTypesArrayArg, 11273 MapNamesArrayArg, 11274 MappersArrayArg}; 11275 CGF.EmitRuntimeCall( 11276 OMPBuilder.getOrCreateRuntimeFunction( 11277 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11278 OffloadingArgs); 11279 }; 11280 11281 // If we need device pointer privatization, we need to emit the body of the 11282 // region with no privatization in the 'else' branch of the conditional. 11283 // Otherwise, we don't have to do anything. 11284 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11285 PrePostActionTy &) { 11286 if (!Info.CaptureDeviceAddrMap.empty()) { 11287 CodeGen.setAction(NoPrivAction); 11288 CodeGen(CGF); 11289 } 11290 }; 11291 11292 // We don't have to do anything to close the region if the if clause evaluates 11293 // to false. 11294 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11295 11296 if (IfCond) { 11297 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11298 } else { 11299 RegionCodeGenTy RCG(BeginThenGen); 11300 RCG(CGF); 11301 } 11302 11303 // If we don't require privatization of device pointers, we emit the body in 11304 // between the runtime calls. This avoids duplicating the body code. 11305 if (Info.CaptureDeviceAddrMap.empty()) { 11306 CodeGen.setAction(NoPrivAction); 11307 CodeGen(CGF); 11308 } 11309 11310 if (IfCond) { 11311 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11312 } else { 11313 RegionCodeGenTy RCG(EndThenGen); 11314 RCG(CGF); 11315 } 11316 } 11317 11318 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11319 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11320 const Expr *Device) { 11321 if (!CGF.HaveInsertPoint()) 11322 return; 11323 11324 assert((isa<OMPTargetEnterDataDirective>(D) || 11325 isa<OMPTargetExitDataDirective>(D) || 11326 isa<OMPTargetUpdateDirective>(D)) && 11327 "Expecting either target enter, exit data, or update directives."); 11328 11329 CodeGenFunction::OMPTargetDataInfo InputInfo; 11330 llvm::Value *MapTypesArray = nullptr; 11331 llvm::Value *MapNamesArray = nullptr; 11332 // Generate the code for the opening of the data environment. 11333 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11334 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11335 // Emit device ID if any. 11336 llvm::Value *DeviceID = nullptr; 11337 if (Device) { 11338 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11339 CGF.Int64Ty, /*isSigned=*/true); 11340 } else { 11341 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11342 } 11343 11344 // Emit the number of elements in the offloading arrays. 11345 llvm::Constant *PointerNum = 11346 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11347 11348 // Source location for the ident struct 11349 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11350 11351 llvm::Value *OffloadingArgs[] = {RTLoc, 11352 DeviceID, 11353 PointerNum, 11354 InputInfo.BasePointersArray.getPointer(), 11355 InputInfo.PointersArray.getPointer(), 11356 InputInfo.SizesArray.getPointer(), 11357 MapTypesArray, 11358 MapNamesArray, 11359 InputInfo.MappersArray.getPointer()}; 11360 11361 // Select the right runtime function call for each standalone 11362 // directive. 11363 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11364 RuntimeFunction RTLFn; 11365 switch (D.getDirectiveKind()) { 11366 case OMPD_target_enter_data: 11367 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11368 : OMPRTL___tgt_target_data_begin_mapper; 11369 break; 11370 case OMPD_target_exit_data: 11371 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11372 : OMPRTL___tgt_target_data_end_mapper; 11373 break; 11374 case OMPD_target_update: 11375 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11376 : OMPRTL___tgt_target_data_update_mapper; 11377 break; 11378 case OMPD_parallel: 11379 case OMPD_for: 11380 case OMPD_parallel_for: 11381 case OMPD_parallel_master: 11382 case OMPD_parallel_sections: 11383 case OMPD_for_simd: 11384 case OMPD_parallel_for_simd: 11385 case OMPD_cancel: 11386 case OMPD_cancellation_point: 11387 case OMPD_ordered: 11388 case OMPD_threadprivate: 11389 case OMPD_allocate: 11390 case OMPD_task: 11391 case OMPD_simd: 11392 case OMPD_tile: 11393 case OMPD_unroll: 11394 case OMPD_sections: 11395 case OMPD_section: 11396 case OMPD_single: 11397 case OMPD_master: 11398 case OMPD_critical: 11399 case OMPD_taskyield: 11400 case OMPD_barrier: 11401 case OMPD_taskwait: 11402 case OMPD_taskgroup: 11403 case OMPD_atomic: 11404 case OMPD_flush: 11405 case OMPD_depobj: 11406 case OMPD_scan: 11407 case OMPD_teams: 11408 case OMPD_target_data: 11409 case OMPD_distribute: 11410 case OMPD_distribute_simd: 11411 case OMPD_distribute_parallel_for: 11412 case OMPD_distribute_parallel_for_simd: 11413 case OMPD_teams_distribute: 11414 case OMPD_teams_distribute_simd: 11415 case OMPD_teams_distribute_parallel_for: 11416 case OMPD_teams_distribute_parallel_for_simd: 11417 case OMPD_declare_simd: 11418 case OMPD_declare_variant: 11419 case OMPD_begin_declare_variant: 11420 case OMPD_end_declare_variant: 11421 case OMPD_declare_target: 11422 case OMPD_end_declare_target: 11423 case OMPD_declare_reduction: 11424 case OMPD_declare_mapper: 11425 case OMPD_taskloop: 11426 case OMPD_taskloop_simd: 11427 case OMPD_master_taskloop: 11428 case OMPD_master_taskloop_simd: 11429 case OMPD_parallel_master_taskloop: 11430 case OMPD_parallel_master_taskloop_simd: 11431 case OMPD_target: 11432 case OMPD_target_simd: 11433 case OMPD_target_teams_distribute: 11434 case OMPD_target_teams_distribute_simd: 11435 case OMPD_target_teams_distribute_parallel_for: 11436 case OMPD_target_teams_distribute_parallel_for_simd: 11437 case OMPD_target_teams: 11438 case OMPD_target_parallel: 11439 case OMPD_target_parallel_for: 11440 case OMPD_target_parallel_for_simd: 11441 case OMPD_requires: 11442 case OMPD_metadirective: 11443 case OMPD_unknown: 11444 default: 11445 llvm_unreachable("Unexpected standalone target data directive."); 11446 break; 11447 } 11448 CGF.EmitRuntimeCall( 11449 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11450 OffloadingArgs); 11451 }; 11452 11453 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11454 &MapNamesArray](CodeGenFunction &CGF, 11455 PrePostActionTy &) { 11456 // Fill up the arrays with all the mapped variables. 11457 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11458 11459 // Get map clause information. 11460 MappableExprsHandler MEHandler(D, CGF); 11461 MEHandler.generateAllInfo(CombinedInfo); 11462 11463 TargetDataInfo Info; 11464 // Fill up the arrays and create the arguments. 11465 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11466 /*IsNonContiguous=*/true); 11467 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11468 D.hasClausesOfKind<OMPNowaitClause>(); 11469 emitOffloadingArraysArgument( 11470 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11471 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11472 {/*ForEndCall=*/false}); 11473 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11474 InputInfo.BasePointersArray = 11475 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11476 InputInfo.PointersArray = 11477 Address(Info.PointersArray, CGM.getPointerAlign()); 11478 InputInfo.SizesArray = 11479 Address(Info.SizesArray, CGM.getPointerAlign()); 11480 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11481 MapTypesArray = Info.MapTypesArray; 11482 MapNamesArray = Info.MapNamesArray; 11483 if (RequiresOuterTask) 11484 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11485 else 11486 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11487 }; 11488 11489 if (IfCond) { 11490 emitIfClause(CGF, IfCond, TargetThenGen, 11491 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11492 } else { 11493 RegionCodeGenTy ThenRCG(TargetThenGen); 11494 ThenRCG(CGF); 11495 } 11496 } 11497 11498 namespace { 11499 /// Kind of parameter in a function with 'declare simd' directive. 11500 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11501 /// Attribute set of the parameter. 11502 struct ParamAttrTy { 11503 ParamKindTy Kind = Vector; 11504 llvm::APSInt StrideOrArg; 11505 llvm::APSInt Alignment; 11506 }; 11507 } // namespace 11508 11509 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11510 ArrayRef<ParamAttrTy> ParamAttrs) { 11511 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11512 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11513 // of that clause. The VLEN value must be power of 2. 11514 // In other case the notion of the function`s "characteristic data type" (CDT) 11515 // is used to compute the vector length. 11516 // CDT is defined in the following order: 11517 // a) For non-void function, the CDT is the return type. 11518 // b) If the function has any non-uniform, non-linear parameters, then the 11519 // CDT is the type of the first such parameter. 11520 // c) If the CDT determined by a) or b) above is struct, union, or class 11521 // type which is pass-by-value (except for the type that maps to the 11522 // built-in complex data type), the characteristic data type is int. 11523 // d) If none of the above three cases is applicable, the CDT is int. 11524 // The VLEN is then determined based on the CDT and the size of vector 11525 // register of that ISA for which current vector version is generated. The 11526 // VLEN is computed using the formula below: 11527 // VLEN = sizeof(vector_register) / sizeof(CDT), 11528 // where vector register size specified in section 3.2.1 Registers and the 11529 // Stack Frame of original AMD64 ABI document. 11530 QualType RetType = FD->getReturnType(); 11531 if (RetType.isNull()) 11532 return 0; 11533 ASTContext &C = FD->getASTContext(); 11534 QualType CDT; 11535 if (!RetType.isNull() && !RetType->isVoidType()) { 11536 CDT = RetType; 11537 } else { 11538 unsigned Offset = 0; 11539 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11540 if (ParamAttrs[Offset].Kind == Vector) 11541 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11542 ++Offset; 11543 } 11544 if (CDT.isNull()) { 11545 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11546 if (ParamAttrs[I + Offset].Kind == Vector) { 11547 CDT = FD->getParamDecl(I)->getType(); 11548 break; 11549 } 11550 } 11551 } 11552 } 11553 if (CDT.isNull()) 11554 CDT = C.IntTy; 11555 CDT = CDT->getCanonicalTypeUnqualified(); 11556 if (CDT->isRecordType() || CDT->isUnionType()) 11557 CDT = C.IntTy; 11558 return C.getTypeSize(CDT); 11559 } 11560 11561 static void 11562 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11563 const llvm::APSInt &VLENVal, 11564 ArrayRef<ParamAttrTy> ParamAttrs, 11565 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11566 struct ISADataTy { 11567 char ISA; 11568 unsigned VecRegSize; 11569 }; 11570 ISADataTy ISAData[] = { 11571 { 11572 'b', 128 11573 }, // SSE 11574 { 11575 'c', 256 11576 }, // AVX 11577 { 11578 'd', 256 11579 }, // AVX2 11580 { 11581 'e', 512 11582 }, // AVX512 11583 }; 11584 llvm::SmallVector<char, 2> Masked; 11585 switch (State) { 11586 case OMPDeclareSimdDeclAttr::BS_Undefined: 11587 Masked.push_back('N'); 11588 Masked.push_back('M'); 11589 break; 11590 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11591 Masked.push_back('N'); 11592 break; 11593 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11594 Masked.push_back('M'); 11595 break; 11596 } 11597 for (char Mask : Masked) { 11598 for (const ISADataTy &Data : ISAData) { 11599 SmallString<256> Buffer; 11600 llvm::raw_svector_ostream Out(Buffer); 11601 Out << "_ZGV" << Data.ISA << Mask; 11602 if (!VLENVal) { 11603 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11604 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11605 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11606 } else { 11607 Out << VLENVal; 11608 } 11609 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11610 switch (ParamAttr.Kind){ 11611 case LinearWithVarStride: 11612 Out << 's' << ParamAttr.StrideOrArg; 11613 break; 11614 case Linear: 11615 Out << 'l'; 11616 if (ParamAttr.StrideOrArg != 1) 11617 Out << ParamAttr.StrideOrArg; 11618 break; 11619 case Uniform: 11620 Out << 'u'; 11621 break; 11622 case Vector: 11623 Out << 'v'; 11624 break; 11625 } 11626 if (!!ParamAttr.Alignment) 11627 Out << 'a' << ParamAttr.Alignment; 11628 } 11629 Out << '_' << Fn->getName(); 11630 Fn->addFnAttr(Out.str()); 11631 } 11632 } 11633 } 11634 11635 // This are the Functions that are needed to mangle the name of the 11636 // vector functions generated by the compiler, according to the rules 11637 // defined in the "Vector Function ABI specifications for AArch64", 11638 // available at 11639 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11640 11641 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11642 /// 11643 /// TODO: Need to implement the behavior for reference marked with a 11644 /// var or no linear modifiers (1.b in the section). For this, we 11645 /// need to extend ParamKindTy to support the linear modifiers. 11646 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11647 QT = QT.getCanonicalType(); 11648 11649 if (QT->isVoidType()) 11650 return false; 11651 11652 if (Kind == ParamKindTy::Uniform) 11653 return false; 11654 11655 if (Kind == ParamKindTy::Linear) 11656 return false; 11657 11658 // TODO: Handle linear references with modifiers 11659 11660 if (Kind == ParamKindTy::LinearWithVarStride) 11661 return false; 11662 11663 return true; 11664 } 11665 11666 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11667 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11668 QT = QT.getCanonicalType(); 11669 unsigned Size = C.getTypeSize(QT); 11670 11671 // Only scalars and complex within 16 bytes wide set PVB to true. 11672 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11673 return false; 11674 11675 if (QT->isFloatingType()) 11676 return true; 11677 11678 if (QT->isIntegerType()) 11679 return true; 11680 11681 if (QT->isPointerType()) 11682 return true; 11683 11684 // TODO: Add support for complex types (section 3.1.2, item 2). 11685 11686 return false; 11687 } 11688 11689 /// Computes the lane size (LS) of a return type or of an input parameter, 11690 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11691 /// TODO: Add support for references, section 3.2.1, item 1. 11692 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11693 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11694 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11695 if (getAArch64PBV(PTy, C)) 11696 return C.getTypeSize(PTy); 11697 } 11698 if (getAArch64PBV(QT, C)) 11699 return C.getTypeSize(QT); 11700 11701 return C.getTypeSize(C.getUIntPtrType()); 11702 } 11703 11704 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11705 // signature of the scalar function, as defined in 3.2.2 of the 11706 // AAVFABI. 11707 static std::tuple<unsigned, unsigned, bool> 11708 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11709 QualType RetType = FD->getReturnType().getCanonicalType(); 11710 11711 ASTContext &C = FD->getASTContext(); 11712 11713 bool OutputBecomesInput = false; 11714 11715 llvm::SmallVector<unsigned, 8> Sizes; 11716 if (!RetType->isVoidType()) { 11717 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11718 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11719 OutputBecomesInput = true; 11720 } 11721 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11722 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11723 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11724 } 11725 11726 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11727 // The LS of a function parameter / return value can only be a power 11728 // of 2, starting from 8 bits, up to 128. 11729 assert(llvm::all_of(Sizes, 11730 [](unsigned Size) { 11731 return Size == 8 || Size == 16 || Size == 32 || 11732 Size == 64 || Size == 128; 11733 }) && 11734 "Invalid size"); 11735 11736 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11737 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11738 OutputBecomesInput); 11739 } 11740 11741 /// Mangle the parameter part of the vector function name according to 11742 /// their OpenMP classification. The mangling function is defined in 11743 /// section 3.5 of the AAVFABI. 11744 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11745 SmallString<256> Buffer; 11746 llvm::raw_svector_ostream Out(Buffer); 11747 for (const auto &ParamAttr : ParamAttrs) { 11748 switch (ParamAttr.Kind) { 11749 case LinearWithVarStride: 11750 Out << "ls" << ParamAttr.StrideOrArg; 11751 break; 11752 case Linear: 11753 Out << 'l'; 11754 // Don't print the step value if it is not present or if it is 11755 // equal to 1. 11756 if (ParamAttr.StrideOrArg != 1) 11757 Out << ParamAttr.StrideOrArg; 11758 break; 11759 case Uniform: 11760 Out << 'u'; 11761 break; 11762 case Vector: 11763 Out << 'v'; 11764 break; 11765 } 11766 11767 if (!!ParamAttr.Alignment) 11768 Out << 'a' << ParamAttr.Alignment; 11769 } 11770 11771 return std::string(Out.str()); 11772 } 11773 11774 // Function used to add the attribute. The parameter `VLEN` is 11775 // templated to allow the use of "x" when targeting scalable functions 11776 // for SVE. 11777 template <typename T> 11778 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11779 char ISA, StringRef ParSeq, 11780 StringRef MangledName, bool OutputBecomesInput, 11781 llvm::Function *Fn) { 11782 SmallString<256> Buffer; 11783 llvm::raw_svector_ostream Out(Buffer); 11784 Out << Prefix << ISA << LMask << VLEN; 11785 if (OutputBecomesInput) 11786 Out << "v"; 11787 Out << ParSeq << "_" << MangledName; 11788 Fn->addFnAttr(Out.str()); 11789 } 11790 11791 // Helper function to generate the Advanced SIMD names depending on 11792 // the value of the NDS when simdlen is not present. 11793 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11794 StringRef Prefix, char ISA, 11795 StringRef ParSeq, StringRef MangledName, 11796 bool OutputBecomesInput, 11797 llvm::Function *Fn) { 11798 switch (NDS) { 11799 case 8: 11800 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11801 OutputBecomesInput, Fn); 11802 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11803 OutputBecomesInput, Fn); 11804 break; 11805 case 16: 11806 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11809 OutputBecomesInput, Fn); 11810 break; 11811 case 32: 11812 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11813 OutputBecomesInput, Fn); 11814 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11815 OutputBecomesInput, Fn); 11816 break; 11817 case 64: 11818 case 128: 11819 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11820 OutputBecomesInput, Fn); 11821 break; 11822 default: 11823 llvm_unreachable("Scalar type is too wide."); 11824 } 11825 } 11826 11827 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11828 static void emitAArch64DeclareSimdFunction( 11829 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11830 ArrayRef<ParamAttrTy> ParamAttrs, 11831 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11832 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11833 11834 // Get basic data for building the vector signature. 11835 const auto Data = getNDSWDS(FD, ParamAttrs); 11836 const unsigned NDS = std::get<0>(Data); 11837 const unsigned WDS = std::get<1>(Data); 11838 const bool OutputBecomesInput = std::get<2>(Data); 11839 11840 // Check the values provided via `simdlen` by the user. 11841 // 1. A `simdlen(1)` doesn't produce vector signatures, 11842 if (UserVLEN == 1) { 11843 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11844 DiagnosticsEngine::Warning, 11845 "The clause simdlen(1) has no effect when targeting aarch64."); 11846 CGM.getDiags().Report(SLoc, DiagID); 11847 return; 11848 } 11849 11850 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11851 // Advanced SIMD output. 11852 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11853 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11854 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11855 "power of 2 when targeting Advanced SIMD."); 11856 CGM.getDiags().Report(SLoc, DiagID); 11857 return; 11858 } 11859 11860 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11861 // limits. 11862 if (ISA == 's' && UserVLEN != 0) { 11863 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11864 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11865 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11866 "lanes in the architectural constraints " 11867 "for SVE (min is 128-bit, max is " 11868 "2048-bit, by steps of 128-bit)"); 11869 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11870 return; 11871 } 11872 } 11873 11874 // Sort out parameter sequence. 11875 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11876 StringRef Prefix = "_ZGV"; 11877 // Generate simdlen from user input (if any). 11878 if (UserVLEN) { 11879 if (ISA == 's') { 11880 // SVE generates only a masked function. 11881 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11882 OutputBecomesInput, Fn); 11883 } else { 11884 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11885 // Advanced SIMD generates one or two functions, depending on 11886 // the `[not]inbranch` clause. 11887 switch (State) { 11888 case OMPDeclareSimdDeclAttr::BS_Undefined: 11889 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11890 OutputBecomesInput, Fn); 11891 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11892 OutputBecomesInput, Fn); 11893 break; 11894 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11895 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11896 OutputBecomesInput, Fn); 11897 break; 11898 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11899 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11900 OutputBecomesInput, Fn); 11901 break; 11902 } 11903 } 11904 } else { 11905 // If no user simdlen is provided, follow the AAVFABI rules for 11906 // generating the vector length. 11907 if (ISA == 's') { 11908 // SVE, section 3.4.1, item 1. 11909 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11910 OutputBecomesInput, Fn); 11911 } else { 11912 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11913 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11914 // two vector names depending on the use of the clause 11915 // `[not]inbranch`. 11916 switch (State) { 11917 case OMPDeclareSimdDeclAttr::BS_Undefined: 11918 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11919 OutputBecomesInput, Fn); 11920 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11921 OutputBecomesInput, Fn); 11922 break; 11923 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11924 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11925 OutputBecomesInput, Fn); 11926 break; 11927 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11928 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11929 OutputBecomesInput, Fn); 11930 break; 11931 } 11932 } 11933 } 11934 } 11935 11936 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11937 llvm::Function *Fn) { 11938 ASTContext &C = CGM.getContext(); 11939 FD = FD->getMostRecentDecl(); 11940 // Map params to their positions in function decl. 11941 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11942 if (isa<CXXMethodDecl>(FD)) 11943 ParamPositions.try_emplace(FD, 0); 11944 unsigned ParamPos = ParamPositions.size(); 11945 for (const ParmVarDecl *P : FD->parameters()) { 11946 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11947 ++ParamPos; 11948 } 11949 while (FD) { 11950 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11951 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11952 // Mark uniform parameters. 11953 for (const Expr *E : Attr->uniforms()) { 11954 E = E->IgnoreParenImpCasts(); 11955 unsigned Pos; 11956 if (isa<CXXThisExpr>(E)) { 11957 Pos = ParamPositions[FD]; 11958 } else { 11959 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11960 ->getCanonicalDecl(); 11961 Pos = ParamPositions[PVD]; 11962 } 11963 ParamAttrs[Pos].Kind = Uniform; 11964 } 11965 // Get alignment info. 11966 auto NI = Attr->alignments_begin(); 11967 for (const Expr *E : Attr->aligneds()) { 11968 E = E->IgnoreParenImpCasts(); 11969 unsigned Pos; 11970 QualType ParmTy; 11971 if (isa<CXXThisExpr>(E)) { 11972 Pos = ParamPositions[FD]; 11973 ParmTy = E->getType(); 11974 } else { 11975 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11976 ->getCanonicalDecl(); 11977 Pos = ParamPositions[PVD]; 11978 ParmTy = PVD->getType(); 11979 } 11980 ParamAttrs[Pos].Alignment = 11981 (*NI) 11982 ? (*NI)->EvaluateKnownConstInt(C) 11983 : llvm::APSInt::getUnsigned( 11984 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11985 .getQuantity()); 11986 ++NI; 11987 } 11988 // Mark linear parameters. 11989 auto SI = Attr->steps_begin(); 11990 auto MI = Attr->modifiers_begin(); 11991 for (const Expr *E : Attr->linears()) { 11992 E = E->IgnoreParenImpCasts(); 11993 unsigned Pos; 11994 // Rescaling factor needed to compute the linear parameter 11995 // value in the mangled name. 11996 unsigned PtrRescalingFactor = 1; 11997 if (isa<CXXThisExpr>(E)) { 11998 Pos = ParamPositions[FD]; 11999 } else { 12000 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 12001 ->getCanonicalDecl(); 12002 Pos = ParamPositions[PVD]; 12003 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 12004 PtrRescalingFactor = CGM.getContext() 12005 .getTypeSizeInChars(P->getPointeeType()) 12006 .getQuantity(); 12007 } 12008 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12009 ParamAttr.Kind = Linear; 12010 // Assuming a stride of 1, for `linear` without modifiers. 12011 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12012 if (*SI) { 12013 Expr::EvalResult Result; 12014 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12015 if (const auto *DRE = 12016 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12017 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12018 ParamAttr.Kind = LinearWithVarStride; 12019 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12020 ParamPositions[StridePVD->getCanonicalDecl()]); 12021 } 12022 } 12023 } else { 12024 ParamAttr.StrideOrArg = Result.Val.getInt(); 12025 } 12026 } 12027 // If we are using a linear clause on a pointer, we need to 12028 // rescale the value of linear_step with the byte size of the 12029 // pointee type. 12030 if (Linear == ParamAttr.Kind) 12031 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12032 ++SI; 12033 ++MI; 12034 } 12035 llvm::APSInt VLENVal; 12036 SourceLocation ExprLoc; 12037 const Expr *VLENExpr = Attr->getSimdlen(); 12038 if (VLENExpr) { 12039 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12040 ExprLoc = VLENExpr->getExprLoc(); 12041 } 12042 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12043 if (CGM.getTriple().isX86()) { 12044 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12045 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12046 unsigned VLEN = VLENVal.getExtValue(); 12047 StringRef MangledName = Fn->getName(); 12048 if (CGM.getTarget().hasFeature("sve")) 12049 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12050 MangledName, 's', 128, Fn, ExprLoc); 12051 if (CGM.getTarget().hasFeature("neon")) 12052 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12053 MangledName, 'n', 128, Fn, ExprLoc); 12054 } 12055 } 12056 FD = FD->getPreviousDecl(); 12057 } 12058 } 12059 12060 namespace { 12061 /// Cleanup action for doacross support. 12062 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12063 public: 12064 static const int DoacrossFinArgs = 2; 12065 12066 private: 12067 llvm::FunctionCallee RTLFn; 12068 llvm::Value *Args[DoacrossFinArgs]; 12069 12070 public: 12071 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12072 ArrayRef<llvm::Value *> CallArgs) 12073 : RTLFn(RTLFn) { 12074 assert(CallArgs.size() == DoacrossFinArgs); 12075 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12076 } 12077 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12078 if (!CGF.HaveInsertPoint()) 12079 return; 12080 CGF.EmitRuntimeCall(RTLFn, Args); 12081 } 12082 }; 12083 } // namespace 12084 12085 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12086 const OMPLoopDirective &D, 12087 ArrayRef<Expr *> NumIterations) { 12088 if (!CGF.HaveInsertPoint()) 12089 return; 12090 12091 ASTContext &C = CGM.getContext(); 12092 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12093 RecordDecl *RD; 12094 if (KmpDimTy.isNull()) { 12095 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12096 // kmp_int64 lo; // lower 12097 // kmp_int64 up; // upper 12098 // kmp_int64 st; // stride 12099 // }; 12100 RD = C.buildImplicitRecord("kmp_dim"); 12101 RD->startDefinition(); 12102 addFieldToRecordDecl(C, RD, Int64Ty); 12103 addFieldToRecordDecl(C, RD, Int64Ty); 12104 addFieldToRecordDecl(C, RD, Int64Ty); 12105 RD->completeDefinition(); 12106 KmpDimTy = C.getRecordType(RD); 12107 } else { 12108 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12109 } 12110 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12111 QualType ArrayTy = 12112 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12113 12114 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12115 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12116 enum { LowerFD = 0, UpperFD, StrideFD }; 12117 // Fill dims with data. 12118 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12119 LValue DimsLVal = CGF.MakeAddrLValue( 12120 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12121 // dims.upper = num_iterations; 12122 LValue UpperLVal = CGF.EmitLValueForField( 12123 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12124 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12125 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12126 Int64Ty, NumIterations[I]->getExprLoc()); 12127 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12128 // dims.stride = 1; 12129 LValue StrideLVal = CGF.EmitLValueForField( 12130 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12131 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12132 StrideLVal); 12133 } 12134 12135 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12136 // kmp_int32 num_dims, struct kmp_dim * dims); 12137 llvm::Value *Args[] = { 12138 emitUpdateLocation(CGF, D.getBeginLoc()), 12139 getThreadID(CGF, D.getBeginLoc()), 12140 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12142 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12143 CGM.VoidPtrTy)}; 12144 12145 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12146 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12147 CGF.EmitRuntimeCall(RTLFn, Args); 12148 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12149 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12150 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12151 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12152 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12153 llvm::makeArrayRef(FiniArgs)); 12154 } 12155 12156 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12157 const OMPDependClause *C) { 12158 QualType Int64Ty = 12159 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12160 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12161 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12162 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12163 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12164 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12165 const Expr *CounterVal = C->getLoopData(I); 12166 assert(CounterVal); 12167 llvm::Value *CntVal = CGF.EmitScalarConversion( 12168 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12169 CounterVal->getExprLoc()); 12170 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12171 /*Volatile=*/false, Int64Ty); 12172 } 12173 llvm::Value *Args[] = { 12174 emitUpdateLocation(CGF, C->getBeginLoc()), 12175 getThreadID(CGF, C->getBeginLoc()), 12176 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12177 llvm::FunctionCallee RTLFn; 12178 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12179 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12180 OMPRTL___kmpc_doacross_post); 12181 } else { 12182 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12183 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12184 OMPRTL___kmpc_doacross_wait); 12185 } 12186 CGF.EmitRuntimeCall(RTLFn, Args); 12187 } 12188 12189 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12190 llvm::FunctionCallee Callee, 12191 ArrayRef<llvm::Value *> Args) const { 12192 assert(Loc.isValid() && "Outlined function call location must be valid."); 12193 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12194 12195 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12196 if (Fn->doesNotThrow()) { 12197 CGF.EmitNounwindRuntimeCall(Fn, Args); 12198 return; 12199 } 12200 } 12201 CGF.EmitRuntimeCall(Callee, Args); 12202 } 12203 12204 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12205 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12206 ArrayRef<llvm::Value *> Args) const { 12207 emitCall(CGF, Loc, OutlinedFn, Args); 12208 } 12209 12210 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12211 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12212 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12213 HasEmittedDeclareTargetRegion = true; 12214 } 12215 12216 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12217 const VarDecl *NativeParam, 12218 const VarDecl *TargetParam) const { 12219 return CGF.GetAddrOfLocalVar(NativeParam); 12220 } 12221 12222 /// Return allocator value from expression, or return a null allocator (default 12223 /// when no allocator specified). 12224 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 12225 const Expr *Allocator) { 12226 llvm::Value *AllocVal; 12227 if (Allocator) { 12228 AllocVal = CGF.EmitScalarExpr(Allocator); 12229 // According to the standard, the original allocator type is a enum 12230 // (integer). Convert to pointer type, if required. 12231 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12232 CGF.getContext().VoidPtrTy, 12233 Allocator->getExprLoc()); 12234 } else { 12235 // If no allocator specified, it defaults to the null allocator. 12236 AllocVal = llvm::Constant::getNullValue( 12237 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 12238 } 12239 return AllocVal; 12240 } 12241 12242 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12243 const VarDecl *VD) { 12244 if (!VD) 12245 return Address::invalid(); 12246 Address UntiedAddr = Address::invalid(); 12247 Address UntiedRealAddr = Address::invalid(); 12248 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12249 if (It != FunctionToUntiedTaskStackMap.end()) { 12250 const UntiedLocalVarsAddressesMap &UntiedData = 12251 UntiedLocalVarsStack[It->second]; 12252 auto I = UntiedData.find(VD); 12253 if (I != UntiedData.end()) { 12254 UntiedAddr = I->second.first; 12255 UntiedRealAddr = I->second.second; 12256 } 12257 } 12258 const VarDecl *CVD = VD->getCanonicalDecl(); 12259 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12260 // Use the default allocation. 12261 if (!isAllocatableDecl(VD)) 12262 return UntiedAddr; 12263 llvm::Value *Size; 12264 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12265 if (CVD->getType()->isVariablyModifiedType()) { 12266 Size = CGF.getTypeSize(CVD->getType()); 12267 // Align the size: ((size + align - 1) / align) * align 12268 Size = CGF.Builder.CreateNUWAdd( 12269 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12270 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12271 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12272 } else { 12273 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12274 Size = CGM.getSize(Sz.alignTo(Align)); 12275 } 12276 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12277 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12278 const Expr *Allocator = AA->getAllocator(); 12279 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 12280 llvm::Value *Alignment = 12281 AA->getAlignment() 12282 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), 12283 CGM.SizeTy, /*isSigned=*/false) 12284 : nullptr; 12285 SmallVector<llvm::Value *, 4> Args; 12286 Args.push_back(ThreadID); 12287 if (Alignment) 12288 Args.push_back(Alignment); 12289 Args.push_back(Size); 12290 Args.push_back(AllocVal); 12291 llvm::omp::RuntimeFunction FnID = 12292 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 12293 llvm::Value *Addr = CGF.EmitRuntimeCall( 12294 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 12295 getName({CVD->getName(), ".void.addr"})); 12296 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12297 CGM.getModule(), OMPRTL___kmpc_free); 12298 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12299 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12300 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12301 if (UntiedAddr.isValid()) 12302 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12303 12304 // Cleanup action for allocate support. 12305 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12306 llvm::FunctionCallee RTLFn; 12307 SourceLocation::UIntTy LocEncoding; 12308 Address Addr; 12309 const Expr *AllocExpr; 12310 12311 public: 12312 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12313 SourceLocation::UIntTy LocEncoding, Address Addr, 12314 const Expr *AllocExpr) 12315 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12316 AllocExpr(AllocExpr) {} 12317 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12318 if (!CGF.HaveInsertPoint()) 12319 return; 12320 llvm::Value *Args[3]; 12321 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12322 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12323 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12324 Addr.getPointer(), CGF.VoidPtrTy); 12325 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 12326 Args[2] = AllocVal; 12327 CGF.EmitRuntimeCall(RTLFn, Args); 12328 } 12329 }; 12330 Address VDAddr = 12331 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12332 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12333 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12334 VDAddr, Allocator); 12335 if (UntiedRealAddr.isValid()) 12336 if (auto *Region = 12337 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12338 Region->emitUntiedSwitch(CGF); 12339 return VDAddr; 12340 } 12341 return UntiedAddr; 12342 } 12343 12344 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12345 const VarDecl *VD) const { 12346 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12347 if (It == FunctionToUntiedTaskStackMap.end()) 12348 return false; 12349 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12350 } 12351 12352 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12353 CodeGenModule &CGM, const OMPLoopDirective &S) 12354 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12355 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12356 if (!NeedToPush) 12357 return; 12358 NontemporalDeclsSet &DS = 12359 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12360 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12361 for (const Stmt *Ref : C->private_refs()) { 12362 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12363 const ValueDecl *VD; 12364 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12365 VD = DRE->getDecl(); 12366 } else { 12367 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12368 assert((ME->isImplicitCXXThis() || 12369 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12370 "Expected member of current class."); 12371 VD = ME->getMemberDecl(); 12372 } 12373 DS.insert(VD); 12374 } 12375 } 12376 } 12377 12378 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12379 if (!NeedToPush) 12380 return; 12381 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12382 } 12383 12384 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12385 CodeGenFunction &CGF, 12386 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12387 std::pair<Address, Address>> &LocalVars) 12388 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12389 if (!NeedToPush) 12390 return; 12391 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12392 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12393 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12394 } 12395 12396 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12397 if (!NeedToPush) 12398 return; 12399 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12400 } 12401 12402 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12403 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12404 12405 return llvm::any_of( 12406 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12407 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12408 } 12409 12410 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12411 const OMPExecutableDirective &S, 12412 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12413 const { 12414 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12415 // Vars in target/task regions must be excluded completely. 12416 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12417 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12418 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12419 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12420 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12421 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12422 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12423 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12424 } 12425 } 12426 // Exclude vars in private clauses. 12427 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12428 for (const Expr *Ref : C->varlists()) { 12429 if (!Ref->getType()->isScalarType()) 12430 continue; 12431 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12432 if (!DRE) 12433 continue; 12434 NeedToCheckForLPCs.insert(DRE->getDecl()); 12435 } 12436 } 12437 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12438 for (const Expr *Ref : C->varlists()) { 12439 if (!Ref->getType()->isScalarType()) 12440 continue; 12441 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12442 if (!DRE) 12443 continue; 12444 NeedToCheckForLPCs.insert(DRE->getDecl()); 12445 } 12446 } 12447 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12448 for (const Expr *Ref : C->varlists()) { 12449 if (!Ref->getType()->isScalarType()) 12450 continue; 12451 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12452 if (!DRE) 12453 continue; 12454 NeedToCheckForLPCs.insert(DRE->getDecl()); 12455 } 12456 } 12457 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12458 for (const Expr *Ref : C->varlists()) { 12459 if (!Ref->getType()->isScalarType()) 12460 continue; 12461 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12462 if (!DRE) 12463 continue; 12464 NeedToCheckForLPCs.insert(DRE->getDecl()); 12465 } 12466 } 12467 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12468 for (const Expr *Ref : C->varlists()) { 12469 if (!Ref->getType()->isScalarType()) 12470 continue; 12471 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12472 if (!DRE) 12473 continue; 12474 NeedToCheckForLPCs.insert(DRE->getDecl()); 12475 } 12476 } 12477 for (const Decl *VD : NeedToCheckForLPCs) { 12478 for (const LastprivateConditionalData &Data : 12479 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12480 if (Data.DeclToUniqueName.count(VD) > 0) { 12481 if (!Data.Disabled) 12482 NeedToAddForLPCsAsDisabled.insert(VD); 12483 break; 12484 } 12485 } 12486 } 12487 } 12488 12489 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12490 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12491 : CGM(CGF.CGM), 12492 Action((CGM.getLangOpts().OpenMP >= 50 && 12493 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12494 [](const OMPLastprivateClause *C) { 12495 return C->getKind() == 12496 OMPC_LASTPRIVATE_conditional; 12497 })) 12498 ? ActionToDo::PushAsLastprivateConditional 12499 : ActionToDo::DoNotPush) { 12500 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12501 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12502 return; 12503 assert(Action == ActionToDo::PushAsLastprivateConditional && 12504 "Expected a push action."); 12505 LastprivateConditionalData &Data = 12506 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12507 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12508 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12509 continue; 12510 12511 for (const Expr *Ref : C->varlists()) { 12512 Data.DeclToUniqueName.insert(std::make_pair( 12513 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12514 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12515 } 12516 } 12517 Data.IVLVal = IVLVal; 12518 Data.Fn = CGF.CurFn; 12519 } 12520 12521 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12522 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12523 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12524 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12525 if (CGM.getLangOpts().OpenMP < 50) 12526 return; 12527 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12528 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12529 if (!NeedToAddForLPCsAsDisabled.empty()) { 12530 Action = ActionToDo::DisableLastprivateConditional; 12531 LastprivateConditionalData &Data = 12532 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12533 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12534 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12535 Data.Fn = CGF.CurFn; 12536 Data.Disabled = true; 12537 } 12538 } 12539 12540 CGOpenMPRuntime::LastprivateConditionalRAII 12541 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12542 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12543 return LastprivateConditionalRAII(CGF, S); 12544 } 12545 12546 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12547 if (CGM.getLangOpts().OpenMP < 50) 12548 return; 12549 if (Action == ActionToDo::DisableLastprivateConditional) { 12550 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12551 "Expected list of disabled private vars."); 12552 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12553 } 12554 if (Action == ActionToDo::PushAsLastprivateConditional) { 12555 assert( 12556 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12557 "Expected list of lastprivate conditional vars."); 12558 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12559 } 12560 } 12561 12562 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12563 const VarDecl *VD) { 12564 ASTContext &C = CGM.getContext(); 12565 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12566 if (I == LastprivateConditionalToTypes.end()) 12567 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12568 QualType NewType; 12569 const FieldDecl *VDField; 12570 const FieldDecl *FiredField; 12571 LValue BaseLVal; 12572 auto VI = I->getSecond().find(VD); 12573 if (VI == I->getSecond().end()) { 12574 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12575 RD->startDefinition(); 12576 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12577 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12578 RD->completeDefinition(); 12579 NewType = C.getRecordType(RD); 12580 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12581 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12582 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12583 } else { 12584 NewType = std::get<0>(VI->getSecond()); 12585 VDField = std::get<1>(VI->getSecond()); 12586 FiredField = std::get<2>(VI->getSecond()); 12587 BaseLVal = std::get<3>(VI->getSecond()); 12588 } 12589 LValue FiredLVal = 12590 CGF.EmitLValueForField(BaseLVal, FiredField); 12591 CGF.EmitStoreOfScalar( 12592 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12593 FiredLVal); 12594 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12595 } 12596 12597 namespace { 12598 /// Checks if the lastprivate conditional variable is referenced in LHS. 12599 class LastprivateConditionalRefChecker final 12600 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12601 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12602 const Expr *FoundE = nullptr; 12603 const Decl *FoundD = nullptr; 12604 StringRef UniqueDeclName; 12605 LValue IVLVal; 12606 llvm::Function *FoundFn = nullptr; 12607 SourceLocation Loc; 12608 12609 public: 12610 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12611 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12612 llvm::reverse(LPM)) { 12613 auto It = D.DeclToUniqueName.find(E->getDecl()); 12614 if (It == D.DeclToUniqueName.end()) 12615 continue; 12616 if (D.Disabled) 12617 return false; 12618 FoundE = E; 12619 FoundD = E->getDecl()->getCanonicalDecl(); 12620 UniqueDeclName = It->second; 12621 IVLVal = D.IVLVal; 12622 FoundFn = D.Fn; 12623 break; 12624 } 12625 return FoundE == E; 12626 } 12627 bool VisitMemberExpr(const MemberExpr *E) { 12628 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12629 return false; 12630 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12631 llvm::reverse(LPM)) { 12632 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12633 if (It == D.DeclToUniqueName.end()) 12634 continue; 12635 if (D.Disabled) 12636 return false; 12637 FoundE = E; 12638 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12639 UniqueDeclName = It->second; 12640 IVLVal = D.IVLVal; 12641 FoundFn = D.Fn; 12642 break; 12643 } 12644 return FoundE == E; 12645 } 12646 bool VisitStmt(const Stmt *S) { 12647 for (const Stmt *Child : S->children()) { 12648 if (!Child) 12649 continue; 12650 if (const auto *E = dyn_cast<Expr>(Child)) 12651 if (!E->isGLValue()) 12652 continue; 12653 if (Visit(Child)) 12654 return true; 12655 } 12656 return false; 12657 } 12658 explicit LastprivateConditionalRefChecker( 12659 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12660 : LPM(LPM) {} 12661 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12662 getFoundData() const { 12663 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12664 } 12665 }; 12666 } // namespace 12667 12668 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12669 LValue IVLVal, 12670 StringRef UniqueDeclName, 12671 LValue LVal, 12672 SourceLocation Loc) { 12673 // Last updated loop counter for the lastprivate conditional var. 12674 // int<xx> last_iv = 0; 12675 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12676 llvm::Constant *LastIV = 12677 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12678 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12679 IVLVal.getAlignment().getAsAlign()); 12680 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12681 12682 // Last value of the lastprivate conditional. 12683 // decltype(priv_a) last_a; 12684 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12685 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12686 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12687 LValue LastLVal = CGF.MakeAddrLValue( 12688 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12689 12690 // Global loop counter. Required to handle inner parallel-for regions. 12691 // iv 12692 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12693 12694 // #pragma omp critical(a) 12695 // if (last_iv <= iv) { 12696 // last_iv = iv; 12697 // last_a = priv_a; 12698 // } 12699 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12700 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12701 Action.Enter(CGF); 12702 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12703 // (last_iv <= iv) ? Check if the variable is updated and store new 12704 // value in global var. 12705 llvm::Value *CmpRes; 12706 if (IVLVal.getType()->isSignedIntegerType()) { 12707 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12708 } else { 12709 assert(IVLVal.getType()->isUnsignedIntegerType() && 12710 "Loop iteration variable must be integer."); 12711 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12712 } 12713 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12714 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12715 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12716 // { 12717 CGF.EmitBlock(ThenBB); 12718 12719 // last_iv = iv; 12720 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12721 12722 // last_a = priv_a; 12723 switch (CGF.getEvaluationKind(LVal.getType())) { 12724 case TEK_Scalar: { 12725 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12726 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12727 break; 12728 } 12729 case TEK_Complex: { 12730 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12731 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12732 break; 12733 } 12734 case TEK_Aggregate: 12735 llvm_unreachable( 12736 "Aggregates are not supported in lastprivate conditional."); 12737 } 12738 // } 12739 CGF.EmitBranch(ExitBB); 12740 // There is no need to emit line number for unconditional branch. 12741 (void)ApplyDebugLocation::CreateEmpty(CGF); 12742 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12743 }; 12744 12745 if (CGM.getLangOpts().OpenMPSimd) { 12746 // Do not emit as a critical region as no parallel region could be emitted. 12747 RegionCodeGenTy ThenRCG(CodeGen); 12748 ThenRCG(CGF); 12749 } else { 12750 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12751 } 12752 } 12753 12754 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12755 const Expr *LHS) { 12756 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12757 return; 12758 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12759 if (!Checker.Visit(LHS)) 12760 return; 12761 const Expr *FoundE; 12762 const Decl *FoundD; 12763 StringRef UniqueDeclName; 12764 LValue IVLVal; 12765 llvm::Function *FoundFn; 12766 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12767 Checker.getFoundData(); 12768 if (FoundFn != CGF.CurFn) { 12769 // Special codegen for inner parallel regions. 12770 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12771 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12772 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12773 "Lastprivate conditional is not found in outer region."); 12774 QualType StructTy = std::get<0>(It->getSecond()); 12775 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12776 LValue PrivLVal = CGF.EmitLValue(FoundE); 12777 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12778 PrivLVal.getAddress(CGF), 12779 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12780 LValue BaseLVal = 12781 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12782 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12783 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12784 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12785 FiredLVal, llvm::AtomicOrdering::Unordered, 12786 /*IsVolatile=*/true, /*isInit=*/false); 12787 return; 12788 } 12789 12790 // Private address of the lastprivate conditional in the current context. 12791 // priv_a 12792 LValue LVal = CGF.EmitLValue(FoundE); 12793 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12794 FoundE->getExprLoc()); 12795 } 12796 12797 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12798 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12799 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12800 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12801 return; 12802 auto Range = llvm::reverse(LastprivateConditionalStack); 12803 auto It = llvm::find_if( 12804 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12805 if (It == Range.end() || It->Fn != CGF.CurFn) 12806 return; 12807 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12808 assert(LPCI != LastprivateConditionalToTypes.end() && 12809 "Lastprivates must be registered already."); 12810 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12811 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12812 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12813 for (const auto &Pair : It->DeclToUniqueName) { 12814 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12815 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12816 continue; 12817 auto I = LPCI->getSecond().find(Pair.first); 12818 assert(I != LPCI->getSecond().end() && 12819 "Lastprivate must be rehistered already."); 12820 // bool Cmp = priv_a.Fired != 0; 12821 LValue BaseLVal = std::get<3>(I->getSecond()); 12822 LValue FiredLVal = 12823 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12824 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12825 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12826 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12827 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12828 // if (Cmp) { 12829 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12830 CGF.EmitBlock(ThenBB); 12831 Address Addr = CGF.GetAddrOfLocalVar(VD); 12832 LValue LVal; 12833 if (VD->getType()->isReferenceType()) 12834 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12835 AlignmentSource::Decl); 12836 else 12837 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12838 AlignmentSource::Decl); 12839 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12840 D.getBeginLoc()); 12841 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12842 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12843 // } 12844 } 12845 } 12846 12847 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12848 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12849 SourceLocation Loc) { 12850 if (CGF.getLangOpts().OpenMP < 50) 12851 return; 12852 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12853 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12854 "Unknown lastprivate conditional variable."); 12855 StringRef UniqueName = It->second; 12856 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12857 // The variable was not updated in the region - exit. 12858 if (!GV) 12859 return; 12860 LValue LPLVal = CGF.MakeAddrLValue( 12861 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12862 PrivLVal.getType().getNonReferenceType()); 12863 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12864 CGF.EmitStoreOfScalar(Res, PrivLVal); 12865 } 12866 12867 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12868 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12869 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12870 llvm_unreachable("Not supported in SIMD-only mode"); 12871 } 12872 12873 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12874 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12875 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12876 llvm_unreachable("Not supported in SIMD-only mode"); 12877 } 12878 12879 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12880 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12881 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12882 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12883 bool Tied, unsigned &NumberOfParts) { 12884 llvm_unreachable("Not supported in SIMD-only mode"); 12885 } 12886 12887 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12888 SourceLocation Loc, 12889 llvm::Function *OutlinedFn, 12890 ArrayRef<llvm::Value *> CapturedVars, 12891 const Expr *IfCond, 12892 llvm::Value *NumThreads) { 12893 llvm_unreachable("Not supported in SIMD-only mode"); 12894 } 12895 12896 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12897 CodeGenFunction &CGF, StringRef CriticalName, 12898 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12899 const Expr *Hint) { 12900 llvm_unreachable("Not supported in SIMD-only mode"); 12901 } 12902 12903 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12904 const RegionCodeGenTy &MasterOpGen, 12905 SourceLocation Loc) { 12906 llvm_unreachable("Not supported in SIMD-only mode"); 12907 } 12908 12909 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12910 const RegionCodeGenTy &MasterOpGen, 12911 SourceLocation Loc, 12912 const Expr *Filter) { 12913 llvm_unreachable("Not supported in SIMD-only mode"); 12914 } 12915 12916 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12917 SourceLocation Loc) { 12918 llvm_unreachable("Not supported in SIMD-only mode"); 12919 } 12920 12921 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12922 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12923 SourceLocation Loc) { 12924 llvm_unreachable("Not supported in SIMD-only mode"); 12925 } 12926 12927 void CGOpenMPSIMDRuntime::emitSingleRegion( 12928 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12929 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12930 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12931 ArrayRef<const Expr *> AssignmentOps) { 12932 llvm_unreachable("Not supported in SIMD-only mode"); 12933 } 12934 12935 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12936 const RegionCodeGenTy &OrderedOpGen, 12937 SourceLocation Loc, 12938 bool IsThreads) { 12939 llvm_unreachable("Not supported in SIMD-only mode"); 12940 } 12941 12942 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12943 SourceLocation Loc, 12944 OpenMPDirectiveKind Kind, 12945 bool EmitChecks, 12946 bool ForceSimpleCall) { 12947 llvm_unreachable("Not supported in SIMD-only mode"); 12948 } 12949 12950 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12951 CodeGenFunction &CGF, SourceLocation Loc, 12952 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12953 bool Ordered, const DispatchRTInput &DispatchValues) { 12954 llvm_unreachable("Not supported in SIMD-only mode"); 12955 } 12956 12957 void CGOpenMPSIMDRuntime::emitForStaticInit( 12958 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12959 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12960 llvm_unreachable("Not supported in SIMD-only mode"); 12961 } 12962 12963 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12964 CodeGenFunction &CGF, SourceLocation Loc, 12965 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12966 llvm_unreachable("Not supported in SIMD-only mode"); 12967 } 12968 12969 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12970 SourceLocation Loc, 12971 unsigned IVSize, 12972 bool IVSigned) { 12973 llvm_unreachable("Not supported in SIMD-only mode"); 12974 } 12975 12976 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12977 SourceLocation Loc, 12978 OpenMPDirectiveKind DKind) { 12979 llvm_unreachable("Not supported in SIMD-only mode"); 12980 } 12981 12982 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12983 SourceLocation Loc, 12984 unsigned IVSize, bool IVSigned, 12985 Address IL, Address LB, 12986 Address UB, Address ST) { 12987 llvm_unreachable("Not supported in SIMD-only mode"); 12988 } 12989 12990 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12991 llvm::Value *NumThreads, 12992 SourceLocation Loc) { 12993 llvm_unreachable("Not supported in SIMD-only mode"); 12994 } 12995 12996 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12997 ProcBindKind ProcBind, 12998 SourceLocation Loc) { 12999 llvm_unreachable("Not supported in SIMD-only mode"); 13000 } 13001 13002 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 13003 const VarDecl *VD, 13004 Address VDAddr, 13005 SourceLocation Loc) { 13006 llvm_unreachable("Not supported in SIMD-only mode"); 13007 } 13008 13009 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 13010 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 13011 CodeGenFunction *CGF) { 13012 llvm_unreachable("Not supported in SIMD-only mode"); 13013 } 13014 13015 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 13016 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 13017 llvm_unreachable("Not supported in SIMD-only mode"); 13018 } 13019 13020 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 13021 ArrayRef<const Expr *> Vars, 13022 SourceLocation Loc, 13023 llvm::AtomicOrdering AO) { 13024 llvm_unreachable("Not supported in SIMD-only mode"); 13025 } 13026 13027 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13028 const OMPExecutableDirective &D, 13029 llvm::Function *TaskFunction, 13030 QualType SharedsTy, Address Shareds, 13031 const Expr *IfCond, 13032 const OMPTaskDataTy &Data) { 13033 llvm_unreachable("Not supported in SIMD-only mode"); 13034 } 13035 13036 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13037 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13038 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13039 const Expr *IfCond, const OMPTaskDataTy &Data) { 13040 llvm_unreachable("Not supported in SIMD-only mode"); 13041 } 13042 13043 void CGOpenMPSIMDRuntime::emitReduction( 13044 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13045 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13046 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13047 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13048 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13049 ReductionOps, Options); 13050 } 13051 13052 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13053 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13054 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13055 llvm_unreachable("Not supported in SIMD-only mode"); 13056 } 13057 13058 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13059 SourceLocation Loc, 13060 bool IsWorksharingReduction) { 13061 llvm_unreachable("Not supported in SIMD-only mode"); 13062 } 13063 13064 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13065 SourceLocation Loc, 13066 ReductionCodeGen &RCG, 13067 unsigned N) { 13068 llvm_unreachable("Not supported in SIMD-only mode"); 13069 } 13070 13071 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13072 SourceLocation Loc, 13073 llvm::Value *ReductionsPtr, 13074 LValue SharedLVal) { 13075 llvm_unreachable("Not supported in SIMD-only mode"); 13076 } 13077 13078 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13079 SourceLocation Loc, 13080 const OMPTaskDataTy &Data) { 13081 llvm_unreachable("Not supported in SIMD-only mode"); 13082 } 13083 13084 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13085 CodeGenFunction &CGF, SourceLocation Loc, 13086 OpenMPDirectiveKind CancelRegion) { 13087 llvm_unreachable("Not supported in SIMD-only mode"); 13088 } 13089 13090 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13091 SourceLocation Loc, const Expr *IfCond, 13092 OpenMPDirectiveKind CancelRegion) { 13093 llvm_unreachable("Not supported in SIMD-only mode"); 13094 } 13095 13096 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13097 const OMPExecutableDirective &D, StringRef ParentName, 13098 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13099 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13100 llvm_unreachable("Not supported in SIMD-only mode"); 13101 } 13102 13103 void CGOpenMPSIMDRuntime::emitTargetCall( 13104 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13105 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13106 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13107 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13108 const OMPLoopDirective &D)> 13109 SizeEmitter) { 13110 llvm_unreachable("Not supported in SIMD-only mode"); 13111 } 13112 13113 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13114 llvm_unreachable("Not supported in SIMD-only mode"); 13115 } 13116 13117 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13118 llvm_unreachable("Not supported in SIMD-only mode"); 13119 } 13120 13121 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13122 return false; 13123 } 13124 13125 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13126 const OMPExecutableDirective &D, 13127 SourceLocation Loc, 13128 llvm::Function *OutlinedFn, 13129 ArrayRef<llvm::Value *> CapturedVars) { 13130 llvm_unreachable("Not supported in SIMD-only mode"); 13131 } 13132 13133 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13134 const Expr *NumTeams, 13135 const Expr *ThreadLimit, 13136 SourceLocation Loc) { 13137 llvm_unreachable("Not supported in SIMD-only mode"); 13138 } 13139 13140 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13141 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13142 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13143 llvm_unreachable("Not supported in SIMD-only mode"); 13144 } 13145 13146 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13147 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13148 const Expr *Device) { 13149 llvm_unreachable("Not supported in SIMD-only mode"); 13150 } 13151 13152 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13153 const OMPLoopDirective &D, 13154 ArrayRef<Expr *> NumIterations) { 13155 llvm_unreachable("Not supported in SIMD-only mode"); 13156 } 13157 13158 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13159 const OMPDependClause *C) { 13160 llvm_unreachable("Not supported in SIMD-only mode"); 13161 } 13162 13163 const VarDecl * 13164 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13165 const VarDecl *NativeParam) const { 13166 llvm_unreachable("Not supported in SIMD-only mode"); 13167 } 13168 13169 Address 13170 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13171 const VarDecl *NativeParam, 13172 const VarDecl *TargetParam) const { 13173 llvm_unreachable("Not supported in SIMD-only mode"); 13174 } 13175