1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 bool NoInheritance = false; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel, 421 bool NoInheritance = true) 422 : CGF(CGF), NoInheritance(NoInheritance) { 423 // Start emission for the construct. 424 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 425 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 426 if (NoInheritance) { 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 429 CGF.LambdaThisCaptureField = nullptr; 430 BlockInfo = CGF.BlockInfo; 431 CGF.BlockInfo = nullptr; 432 } 433 } 434 435 ~InlinedOpenMPRegionRAII() { 436 // Restore original CapturedStmtInfo only if we're done with code emission. 437 auto *OldCSI = 438 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 439 delete CGF.CapturedStmtInfo; 440 CGF.CapturedStmtInfo = OldCSI; 441 if (NoInheritance) { 442 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 443 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 444 CGF.BlockInfo = BlockInfo; 445 } 446 } 447 }; 448 449 /// Values for bit flags used in the ident_t to describe the fields. 450 /// All enumeric elements are named and described in accordance with the code 451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 452 enum OpenMPLocationFlags : unsigned { 453 /// Use trampoline for internal microtask. 454 OMP_IDENT_IMD = 0x01, 455 /// Use c-style ident structure. 456 OMP_IDENT_KMPC = 0x02, 457 /// Atomic reduction option for kmpc_reduce. 458 OMP_ATOMIC_REDUCE = 0x10, 459 /// Explicit 'barrier' directive. 460 OMP_IDENT_BARRIER_EXPL = 0x20, 461 /// Implicit barrier in code. 462 OMP_IDENT_BARRIER_IMPL = 0x40, 463 /// Implicit barrier in 'for' directive. 464 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 465 /// Implicit barrier in 'sections' directive. 466 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 467 /// Implicit barrier in 'single' directive. 468 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 469 /// Call of __kmp_for_static_init for static loop. 470 OMP_IDENT_WORK_LOOP = 0x200, 471 /// Call of __kmp_for_static_init for sections. 472 OMP_IDENT_WORK_SECTIONS = 0x400, 473 /// Call of __kmp_for_static_init for distribute. 474 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 475 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 476 }; 477 478 namespace { 479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 480 /// Values for bit flags for marking which requires clauses have been used. 481 enum OpenMPOffloadingRequiresDirFlags : int64_t { 482 /// flag undefined. 483 OMP_REQ_UNDEFINED = 0x000, 484 /// no requires clause present. 485 OMP_REQ_NONE = 0x001, 486 /// reverse_offload clause. 487 OMP_REQ_REVERSE_OFFLOAD = 0x002, 488 /// unified_address clause. 489 OMP_REQ_UNIFIED_ADDRESS = 0x004, 490 /// unified_shared_memory clause. 491 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 492 /// dynamic_allocators clause. 493 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 494 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 495 }; 496 497 enum OpenMPOffloadingReservedDeviceIDs { 498 /// Device ID if the device was not defined, runtime should get it 499 /// from environment variables in the spec. 500 OMP_DEVICEID_UNDEF = -1, 501 }; 502 } // anonymous namespace 503 504 /// Describes ident structure that describes a source location. 505 /// All descriptions are taken from 506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 507 /// Original structure: 508 /// typedef struct ident { 509 /// kmp_int32 reserved_1; /**< might be used in Fortran; 510 /// see above */ 511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 512 /// KMP_IDENT_KMPC identifies this union 513 /// member */ 514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 515 /// see above */ 516 ///#if USE_ITT_BUILD 517 /// /* but currently used for storing 518 /// region-specific ITT */ 519 /// /* contextual information. */ 520 ///#endif /* USE_ITT_BUILD */ 521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 522 /// C++ */ 523 /// char const *psource; /**< String describing the source location. 524 /// The string is composed of semi-colon separated 525 // fields which describe the source file, 526 /// the function and a pair of line numbers that 527 /// delimit the construct. 528 /// */ 529 /// } ident_t; 530 enum IdentFieldIndex { 531 /// might be used in Fortran 532 IdentField_Reserved_1, 533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 534 IdentField_Flags, 535 /// Not really used in Fortran any more 536 IdentField_Reserved_2, 537 /// Source[4] in Fortran, do not use for C++ 538 IdentField_Reserved_3, 539 /// String describing the source location. The string is composed of 540 /// semi-colon separated fields which describe the source file, the function 541 /// and a pair of line numbers that delimit the construct. 542 IdentField_PSource 543 }; 544 545 /// Schedule types for 'omp for' loops (these enumerators are taken from 546 /// the enum sched_type in kmp.h). 547 enum OpenMPSchedType { 548 /// Lower bound for default (unordered) versions. 549 OMP_sch_lower = 32, 550 OMP_sch_static_chunked = 33, 551 OMP_sch_static = 34, 552 OMP_sch_dynamic_chunked = 35, 553 OMP_sch_guided_chunked = 36, 554 OMP_sch_runtime = 37, 555 OMP_sch_auto = 38, 556 /// static with chunk adjustment (e.g., simd) 557 OMP_sch_static_balanced_chunked = 45, 558 /// Lower bound for 'ordered' versions. 559 OMP_ord_lower = 64, 560 OMP_ord_static_chunked = 65, 561 OMP_ord_static = 66, 562 OMP_ord_dynamic_chunked = 67, 563 OMP_ord_guided_chunked = 68, 564 OMP_ord_runtime = 69, 565 OMP_ord_auto = 70, 566 OMP_sch_default = OMP_sch_static, 567 /// dist_schedule types 568 OMP_dist_sch_static_chunked = 91, 569 OMP_dist_sch_static = 92, 570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 571 /// Set if the monotonic schedule modifier was present. 572 OMP_sch_modifier_monotonic = (1 << 29), 573 /// Set if the nonmonotonic schedule modifier was present. 574 OMP_sch_modifier_nonmonotonic = (1 << 30), 575 }; 576 577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 578 /// region. 579 class CleanupTy final : public EHScopeStack::Cleanup { 580 PrePostActionTy *Action; 581 582 public: 583 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 584 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 585 if (!CGF.HaveInsertPoint()) 586 return; 587 Action->Exit(CGF); 588 } 589 }; 590 591 } // anonymous namespace 592 593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 594 CodeGenFunction::RunCleanupsScope Scope(CGF); 595 if (PrePostAction) { 596 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 597 Callback(CodeGen, CGF, *PrePostAction); 598 } else { 599 PrePostActionTy Action; 600 Callback(CodeGen, CGF, Action); 601 } 602 } 603 604 /// Check if the combiner is a call to UDR combiner and if it is so return the 605 /// UDR decl used for reduction. 606 static const OMPDeclareReductionDecl * 607 getReductionInit(const Expr *ReductionOp) { 608 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 609 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 610 if (const auto *DRE = 611 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 612 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 613 return DRD; 614 return nullptr; 615 } 616 617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 618 const OMPDeclareReductionDecl *DRD, 619 const Expr *InitOp, 620 Address Private, Address Original, 621 QualType Ty) { 622 if (DRD->getInitializer()) { 623 std::pair<llvm::Function *, llvm::Function *> Reduction = 624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 625 const auto *CE = cast<CallExpr>(InitOp); 626 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 627 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 628 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 629 const auto *LHSDRE = 630 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 631 const auto *RHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 633 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 634 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 635 [=]() { return Private; }); 636 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 637 [=]() { return Original; }); 638 (void)PrivateScope.Privatize(); 639 RValue Func = RValue::get(Reduction.second); 640 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 641 CGF.EmitIgnoredExpr(InitOp); 642 } else { 643 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 644 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 645 auto *GV = new llvm::GlobalVariable( 646 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 647 llvm::GlobalValue::PrivateLinkage, Init, Name); 648 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 649 RValue InitRVal; 650 switch (CGF.getEvaluationKind(Ty)) { 651 case TEK_Scalar: 652 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 653 break; 654 case TEK_Complex: 655 InitRVal = 656 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 657 break; 658 case TEK_Aggregate: { 659 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 660 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 661 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 662 /*IsInitializer=*/false); 663 return; 664 } 665 } 666 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 667 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 668 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 669 /*IsInitializer=*/false); 670 } 671 } 672 673 /// Emit initialization of arrays of complex types. 674 /// \param DestAddr Address of the array. 675 /// \param Type Type of array. 676 /// \param Init Initial expression of array. 677 /// \param SrcAddr Address of the original array. 678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 679 QualType Type, bool EmitDeclareReductionInit, 680 const Expr *Init, 681 const OMPDeclareReductionDecl *DRD, 682 Address SrcAddr = Address::invalid()) { 683 // Perform element-by-element initialization. 684 QualType ElementTy; 685 686 // Drill down to the base element type on both arrays. 687 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 688 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 689 DestAddr = 690 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 746 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 747 } 748 749 // Shift the address forward by one element. 750 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 751 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 752 // Check whether we've reached the end. 753 llvm::Value *Done = 754 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 755 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 756 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 757 758 // Done. 759 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 760 } 761 762 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 763 return CGF.EmitOMPSharedLValue(E); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 767 const Expr *E) { 768 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 769 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 770 return LValue(); 771 } 772 773 void ReductionCodeGen::emitAggregateInitialization( 774 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 775 const OMPDeclareReductionDecl *DRD) { 776 // Emit VarDecl with copy init for arrays. 777 // Get the address of the original variable captured in current 778 // captured region. 779 const auto *PrivateVD = 780 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 781 bool EmitDeclareReductionInit = 782 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 783 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 784 EmitDeclareReductionInit, 785 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 786 : PrivateVD->getInit(), 787 DRD, SharedLVal.getAddress(CGF)); 788 } 789 790 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 791 ArrayRef<const Expr *> Origs, 792 ArrayRef<const Expr *> Privates, 793 ArrayRef<const Expr *> ReductionOps) { 794 ClausesData.reserve(Shareds.size()); 795 SharedAddresses.reserve(Shareds.size()); 796 Sizes.reserve(Shareds.size()); 797 BaseDecls.reserve(Shareds.size()); 798 const auto *IOrig = Origs.begin(); 799 const auto *IPriv = Privates.begin(); 800 const auto *IRed = ReductionOps.begin(); 801 for (const Expr *Ref : Shareds) { 802 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 803 std::advance(IOrig, 1); 804 std::advance(IPriv, 1); 805 std::advance(IRed, 1); 806 } 807 } 808 809 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 810 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 811 "Number of generated lvalues must be exactly N."); 812 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 813 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 814 SharedAddresses.emplace_back(First, Second); 815 if (ClausesData[N].Shared == ClausesData[N].Ref) { 816 OrigAddresses.emplace_back(First, Second); 817 } else { 818 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 819 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 820 OrigAddresses.emplace_back(First, Second); 821 } 822 } 823 824 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 825 const auto *PrivateVD = 826 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 827 QualType PrivateType = PrivateVD->getType(); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = 838 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 839 ->getElementType(); 840 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 841 if (AsArraySection) { 842 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 843 OrigAddresses[N].first.getPointer(CGF)); 844 Size = CGF.Builder.CreateNUWAdd( 845 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 846 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 847 } else { 848 SizeInChars = 849 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 850 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 851 } 852 Sizes.emplace_back(SizeInChars, Size); 853 CodeGenFunction::OpaqueValueMapping OpaqueMap( 854 CGF, 855 cast<OpaqueValueExpr>( 856 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 857 RValue::get(Size)); 858 CGF.EmitVariablyModifiedType(PrivateType); 859 } 860 861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 862 llvm::Value *Size) { 863 const auto *PrivateVD = 864 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 865 QualType PrivateType = PrivateVD->getType(); 866 if (!PrivateType->isVariablyModifiedType()) { 867 assert(!Size && !Sizes[N].second && 868 "Size should be nullptr for non-variably modified reduction " 869 "items."); 870 return; 871 } 872 CodeGenFunction::OpaqueValueMapping OpaqueMap( 873 CGF, 874 cast<OpaqueValueExpr>( 875 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 876 RValue::get(Size)); 877 CGF.EmitVariablyModifiedType(PrivateType); 878 } 879 880 void ReductionCodeGen::emitInitialization( 881 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 882 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 883 assert(SharedAddresses.size() > N && "No variable was generated"); 884 const auto *PrivateVD = 885 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 886 const OMPDeclareReductionDecl *DRD = 887 getReductionInit(ClausesData[N].ReductionOp); 888 QualType PrivateType = PrivateVD->getType(); 889 PrivateAddr = CGF.Builder.CreateElementBitCast( 890 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 SharedLVal = CGF.MakeAddrLValue( 893 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 894 CGF.ConvertTypeForMem(SharedType)), 895 SharedType, SharedAddresses[N].first.getBaseInfo(), 896 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 897 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 898 if (DRD && DRD->getInitializer()) 899 (void)DefaultInit(CGF); 900 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 901 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 902 (void)DefaultInit(CGF); 903 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 904 PrivateAddr, SharedLVal.getAddress(CGF), 905 SharedLVal.getType()); 906 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 907 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 908 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 909 PrivateVD->getType().getQualifiers(), 910 /*IsInitializer=*/false); 911 } 912 } 913 914 bool ReductionCodeGen::needCleanups(unsigned N) { 915 const auto *PrivateVD = 916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 917 QualType PrivateType = PrivateVD->getType(); 918 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 919 return DTorKind != QualType::DK_none; 920 } 921 922 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 923 Address PrivateAddr) { 924 const auto *PrivateVD = 925 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 926 QualType PrivateType = PrivateVD->getType(); 927 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 928 if (needCleanups(N)) { 929 PrivateAddr = CGF.Builder.CreateElementBitCast( 930 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 931 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 932 } 933 } 934 935 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 936 LValue BaseLV) { 937 BaseTy = BaseTy.getNonReferenceType(); 938 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 939 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 940 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 941 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 942 } else { 943 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 944 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 945 } 946 BaseTy = BaseTy->getPointeeType(); 947 } 948 return CGF.MakeAddrLValue( 949 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 950 CGF.ConvertTypeForMem(ElTy)), 951 BaseLV.getType(), BaseLV.getBaseInfo(), 952 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 953 } 954 955 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 956 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 957 llvm::Value *Addr) { 958 Address Tmp = Address::invalid(); 959 Address TopTmp = Address::invalid(); 960 Address MostTopTmp = Address::invalid(); 961 BaseTy = BaseTy.getNonReferenceType(); 962 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 963 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 964 Tmp = CGF.CreateMemTemp(BaseTy); 965 if (TopTmp.isValid()) 966 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 967 else 968 MostTopTmp = Tmp; 969 TopTmp = Tmp; 970 BaseTy = BaseTy->getPointeeType(); 971 } 972 llvm::Type *Ty = BaseLVType; 973 if (Tmp.isValid()) 974 Ty = Tmp.getElementType(); 975 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 976 if (Tmp.isValid()) { 977 CGF.Builder.CreateStore(Addr, Tmp); 978 return MostTopTmp; 979 } 980 return Address(Addr, BaseLVAlignment); 981 } 982 983 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 984 const VarDecl *OrigVD = nullptr; 985 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 986 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 987 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 988 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 989 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 990 Base = TempASE->getBase()->IgnoreParenImpCasts(); 991 DE = cast<DeclRefExpr>(Base); 992 OrigVD = cast<VarDecl>(DE->getDecl()); 993 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 994 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 995 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 996 Base = TempASE->getBase()->IgnoreParenImpCasts(); 997 DE = cast<DeclRefExpr>(Base); 998 OrigVD = cast<VarDecl>(DE->getDecl()); 999 } 1000 return OrigVD; 1001 } 1002 1003 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1004 Address PrivateAddr) { 1005 const DeclRefExpr *DE; 1006 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1007 BaseDecls.emplace_back(OrigVD); 1008 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1009 LValue BaseLValue = 1010 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue); 1012 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1013 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1014 llvm::Value *PrivatePointer = 1015 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1016 PrivateAddr.getPointer(), 1017 SharedAddresses[N].first.getAddress(CGF).getType()); 1018 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1019 return castToBase(CGF, OrigVD->getType(), 1020 SharedAddresses[N].first.getType(), 1021 OriginalBaseLValue.getAddress(CGF).getType(), 1022 OriginalBaseLValue.getAlignment(), Ptr); 1023 } 1024 BaseDecls.emplace_back( 1025 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1026 return PrivateAddr; 1027 } 1028 1029 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1030 const OMPDeclareReductionDecl *DRD = 1031 getReductionInit(ClausesData[N].ReductionOp); 1032 return DRD && DRD->getInitializer(); 1033 } 1034 1035 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1036 return CGF.EmitLoadOfPointerLValue( 1037 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1038 getThreadIDVariable()->getType()->castAs<PointerType>()); 1039 } 1040 1041 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1042 if (!CGF.HaveInsertPoint()) 1043 return; 1044 // 1.2.2 OpenMP Language Terminology 1045 // Structured block - An executable statement with a single entry at the 1046 // top and a single exit at the bottom. 1047 // The point of exit cannot be a branch out of the structured block. 1048 // longjmp() and throw() must not violate the entry/exit criteria. 1049 CGF.EHStack.pushTerminate(); 1050 if (S) 1051 CGF.incrementProfileCounter(S); 1052 CodeGen(CGF); 1053 CGF.EHStack.popTerminate(); 1054 } 1055 1056 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1057 CodeGenFunction &CGF) { 1058 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1059 getThreadIDVariable()->getType(), 1060 AlignmentSource::Decl); 1061 } 1062 1063 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1064 QualType FieldTy) { 1065 auto *Field = FieldDecl::Create( 1066 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1067 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1068 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1069 Field->setAccess(AS_public); 1070 DC->addDecl(Field); 1071 return Field; 1072 } 1073 1074 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1075 StringRef Separator) 1076 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1077 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1078 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1079 1080 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1081 OMPBuilder.initialize(); 1082 loadOffloadInfoMetadata(); 1083 } 1084 1085 void CGOpenMPRuntime::clear() { 1086 InternalVars.clear(); 1087 // Clean non-target variable declarations possibly used only in debug info. 1088 for (const auto &Data : EmittedNonTargetVariables) { 1089 if (!Data.getValue().pointsToAliveValue()) 1090 continue; 1091 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1092 if (!GV) 1093 continue; 1094 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1095 continue; 1096 GV->eraseFromParent(); 1097 } 1098 } 1099 1100 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1101 SmallString<128> Buffer; 1102 llvm::raw_svector_ostream OS(Buffer); 1103 StringRef Sep = FirstSeparator; 1104 for (StringRef Part : Parts) { 1105 OS << Sep << Part; 1106 Sep = Separator; 1107 } 1108 return std::string(OS.str()); 1109 } 1110 1111 static llvm::Function * 1112 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1113 const Expr *CombinerInitializer, const VarDecl *In, 1114 const VarDecl *Out, bool IsCombiner) { 1115 // void .omp_combiner.(Ty *in, Ty *out); 1116 ASTContext &C = CGM.getContext(); 1117 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1118 FunctionArgList Args; 1119 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1122 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1123 Args.push_back(&OmpOutParm); 1124 Args.push_back(&OmpInParm); 1125 const CGFunctionInfo &FnInfo = 1126 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1127 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1128 std::string Name = CGM.getOpenMPRuntime().getName( 1129 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1130 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1131 Name, &CGM.getModule()); 1132 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1133 if (CGM.getLangOpts().Optimize) { 1134 Fn->removeFnAttr(llvm::Attribute::NoInline); 1135 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1136 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1137 } 1138 CodeGenFunction CGF(CGM); 1139 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1140 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1141 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1142 Out->getLocation()); 1143 CodeGenFunction::OMPPrivateScope Scope(CGF); 1144 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1145 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1146 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1147 .getAddress(CGF); 1148 }); 1149 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1150 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 (void)Scope.Privatize(); 1155 if (!IsCombiner && Out->hasInit() && 1156 !CGF.isTrivialInitializer(Out->getInit())) { 1157 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1158 Out->getType().getQualifiers(), 1159 /*IsInitializer=*/true); 1160 } 1161 if (CombinerInitializer) 1162 CGF.EmitIgnoredExpr(CombinerInitializer); 1163 Scope.ForceCleanup(); 1164 CGF.FinishFunction(); 1165 return Fn; 1166 } 1167 1168 void CGOpenMPRuntime::emitUserDefinedReduction( 1169 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1170 if (UDRMap.count(D) > 0) 1171 return; 1172 llvm::Function *Combiner = emitCombinerOrInitializer( 1173 CGM, D->getType(), D->getCombiner(), 1174 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1176 /*IsCombiner=*/true); 1177 llvm::Function *Initializer = nullptr; 1178 if (const Expr *Init = D->getInitializer()) { 1179 Initializer = emitCombinerOrInitializer( 1180 CGM, D->getType(), 1181 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1182 : nullptr, 1183 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1185 /*IsCombiner=*/false); 1186 } 1187 UDRMap.try_emplace(D, Combiner, Initializer); 1188 if (CGF) { 1189 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1190 Decls.second.push_back(D); 1191 } 1192 } 1193 1194 std::pair<llvm::Function *, llvm::Function *> 1195 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1196 auto I = UDRMap.find(D); 1197 if (I != UDRMap.end()) 1198 return I->second; 1199 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1200 return UDRMap.lookup(D); 1201 } 1202 1203 namespace { 1204 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1205 // Builder if one is present. 1206 struct PushAndPopStackRAII { 1207 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1208 bool HasCancel) 1209 : OMPBuilder(OMPBuilder) { 1210 if (!OMPBuilder) 1211 return; 1212 1213 // The following callback is the crucial part of clangs cleanup process. 1214 // 1215 // NOTE: 1216 // Once the OpenMPIRBuilder is used to create parallel regions (and 1217 // similar), the cancellation destination (Dest below) is determined via 1218 // IP. That means if we have variables to finalize we split the block at IP, 1219 // use the new block (=BB) as destination to build a JumpDest (via 1220 // getJumpDestInCurrentScope(BB)) which then is fed to 1221 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1222 // to push & pop an FinalizationInfo object. 1223 // The FiniCB will still be needed but at the point where the 1224 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1225 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1226 assert(IP.getBlock()->end() == IP.getPoint() && 1227 "Clang CG should cause non-terminated block!"); 1228 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1229 CGF.Builder.restoreIP(IP); 1230 CodeGenFunction::JumpDest Dest = 1231 CGF.getOMPCancelDestination(OMPD_parallel); 1232 CGF.EmitBranchThroughCleanup(Dest); 1233 }; 1234 1235 // TODO: Remove this once we emit parallel regions through the 1236 // OpenMPIRBuilder as it can do this setup internally. 1237 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1238 {FiniCB, OMPD_parallel, HasCancel}); 1239 OMPBuilder->pushFinalizationCB(std::move(FI)); 1240 } 1241 ~PushAndPopStackRAII() { 1242 if (OMPBuilder) 1243 OMPBuilder->popFinalizationCB(); 1244 } 1245 llvm::OpenMPIRBuilder *OMPBuilder; 1246 }; 1247 } // namespace 1248 1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1250 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1252 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1253 assert(ThreadIDVar->getType()->isPointerType() && 1254 "thread id variable must be of type kmp_int32 *"); 1255 CodeGenFunction CGF(CGM, true); 1256 bool HasCancel = false; 1257 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1260 HasCancel = OPD->hasCancel(); 1261 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1262 HasCancel = OPSD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 1276 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1277 // parallel region to make cancellation barriers work properly. 1278 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1279 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1280 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1281 HasCancel, OutlinedHelperName); 1282 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1283 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1284 } 1285 1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1287 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1289 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1290 return emitParallelOrTeamsOutlinedFunction( 1291 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1292 } 1293 1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1297 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1298 return emitParallelOrTeamsOutlinedFunction( 1299 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1300 } 1301 1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1303 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1304 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1305 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1306 bool Tied, unsigned &NumberOfParts) { 1307 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1308 PrePostActionTy &) { 1309 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1310 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1311 llvm::Value *TaskArgs[] = { 1312 UpLoc, ThreadID, 1313 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1314 TaskTVar->getType()->castAs<PointerType>()) 1315 .getPointer(CGF)}; 1316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1317 CGM.getModule(), OMPRTL___kmpc_omp_task), 1318 TaskArgs); 1319 }; 1320 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1321 UntiedCodeGen); 1322 CodeGen.setAction(Action); 1323 assert(!ThreadIDVar->getType()->isPointerType() && 1324 "thread id variable must be of type kmp_int32 for tasks"); 1325 const OpenMPDirectiveKind Region = 1326 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1327 : OMPD_task; 1328 const CapturedStmt *CS = D.getCapturedStmt(Region); 1329 bool HasCancel = false; 1330 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 1339 CodeGenFunction CGF(CGM, true); 1340 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1341 InnermostKind, HasCancel, Action); 1342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1343 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1344 if (!Tied) 1345 NumberOfParts = Action.getNumberOfParts(); 1346 return Res; 1347 } 1348 1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1350 const RecordDecl *RD, const CGRecordLayout &RL, 1351 ArrayRef<llvm::Constant *> Data) { 1352 llvm::StructType *StructTy = RL.getLLVMType(); 1353 unsigned PrevIdx = 0; 1354 ConstantInitBuilder CIBuilder(CGM); 1355 auto DI = Data.begin(); 1356 for (const FieldDecl *FD : RD->fields()) { 1357 unsigned Idx = RL.getLLVMFieldNo(FD); 1358 // Fill the alignment. 1359 for (unsigned I = PrevIdx; I < Idx; ++I) 1360 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1361 PrevIdx = Idx + 1; 1362 Fields.add(*DI); 1363 ++DI; 1364 } 1365 } 1366 1367 template <class... As> 1368 static llvm::GlobalVariable * 1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1370 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1371 As &&... Args) { 1372 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1373 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1374 ConstantInitBuilder CIBuilder(CGM); 1375 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1376 buildStructValue(Fields, CGM, RD, RL, Data); 1377 return Fields.finishAndCreateGlobal( 1378 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1379 std::forward<As>(Args)...); 1380 } 1381 1382 template <typename T> 1383 static void 1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1385 ArrayRef<llvm::Constant *> Data, 1386 T &Parent) { 1387 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1388 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1389 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1390 buildStructValue(Fields, CGM, RD, RL, Data); 1391 Fields.finishAndAddTo(Parent); 1392 } 1393 1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1395 bool AtCurrentPoint) { 1396 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1397 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1398 1399 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1400 if (AtCurrentPoint) { 1401 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1402 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1403 } else { 1404 Elem.second.ServiceInsertPt = 1405 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1406 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1407 } 1408 } 1409 1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1412 if (Elem.second.ServiceInsertPt) { 1413 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1414 Elem.second.ServiceInsertPt = nullptr; 1415 Ptr->eraseFromParent(); 1416 } 1417 } 1418 1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1420 SourceLocation Loc, 1421 SmallString<128> &Buffer) { 1422 llvm::raw_svector_ostream OS(Buffer); 1423 // Build debug location 1424 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1425 OS << ";" << PLoc.getFilename() << ";"; 1426 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1427 OS << FD->getQualifiedNameAsString(); 1428 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1429 return OS.str(); 1430 } 1431 1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1433 SourceLocation Loc, 1434 unsigned Flags) { 1435 llvm::Constant *SrcLocStr; 1436 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1437 Loc.isInvalid()) { 1438 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1439 } else { 1440 std::string FunctionName = ""; 1441 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1442 FunctionName = FD->getQualifiedNameAsString(); 1443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1444 const char *FileName = PLoc.getFilename(); 1445 unsigned Line = PLoc.getLine(); 1446 unsigned Column = PLoc.getColumn(); 1447 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1448 Line, Column); 1449 } 1450 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1451 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1452 Reserved2Flags); 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1456 SourceLocation Loc) { 1457 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1458 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1459 // the clang invariants used below might be broken. 1460 if (CGM.getLangOpts().OpenMPIRBuilder) { 1461 SmallString<128> Buffer; 1462 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521 } 1522 1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556 } 1557 1558 llvm::FunctionCallee 1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1563 : "__kmpc_for_static_init_4u") 1564 : (IVSigned ? "__kmpc_for_static_init_8" 1565 : "__kmpc_for_static_init_8u"); 1566 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1567 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1568 llvm::Type *TypeParams[] = { 1569 getIdentTyPointerTy(), // loc 1570 CGM.Int32Ty, // tid 1571 CGM.Int32Ty, // schedtype 1572 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1573 PtrTy, // p_lower 1574 PtrTy, // p_upper 1575 PtrTy, // p_stride 1576 ITy, // incr 1577 ITy // chunk 1578 }; 1579 auto *FnTy = 1580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1581 return CGM.CreateRuntimeFunction(FnTy, Name); 1582 } 1583 1584 llvm::FunctionCallee 1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1586 assert((IVSize == 32 || IVSize == 64) && 1587 "IV size is not compatible with the omp runtime"); 1588 StringRef Name = 1589 IVSize == 32 1590 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1591 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1592 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1593 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1594 CGM.Int32Ty, // tid 1595 CGM.Int32Ty, // schedtype 1596 ITy, // lower 1597 ITy, // upper 1598 ITy, // stride 1599 ITy // chunk 1600 }; 1601 auto *FnTy = 1602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1603 return CGM.CreateRuntimeFunction(FnTy, Name); 1604 } 1605 1606 llvm::FunctionCallee 1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1608 assert((IVSize == 32 || IVSize == 64) && 1609 "IV size is not compatible with the omp runtime"); 1610 StringRef Name = 1611 IVSize == 32 1612 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1613 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1614 llvm::Type *TypeParams[] = { 1615 getIdentTyPointerTy(), // loc 1616 CGM.Int32Ty, // tid 1617 }; 1618 auto *FnTy = 1619 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1620 return CGM.CreateRuntimeFunction(FnTy, Name); 1621 } 1622 1623 llvm::FunctionCallee 1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1625 assert((IVSize == 32 || IVSize == 64) && 1626 "IV size is not compatible with the omp runtime"); 1627 StringRef Name = 1628 IVSize == 32 1629 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1630 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1631 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1632 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1633 llvm::Type *TypeParams[] = { 1634 getIdentTyPointerTy(), // loc 1635 CGM.Int32Ty, // tid 1636 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1637 PtrTy, // p_lower 1638 PtrTy, // p_upper 1639 PtrTy // p_stride 1640 }; 1641 auto *FnTy = 1642 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1643 return CGM.CreateRuntimeFunction(FnTy, Name); 1644 } 1645 1646 /// Obtain information that uniquely identifies a target entry. This 1647 /// consists of the file and device IDs as well as line number associated with 1648 /// the relevant entry source location. 1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1650 unsigned &DeviceID, unsigned &FileID, 1651 unsigned &LineNum) { 1652 SourceManager &SM = C.getSourceManager(); 1653 1654 // The loc should be always valid and have a file ID (the user cannot use 1655 // #pragma directives in macros) 1656 1657 assert(Loc.isValid() && "Source location is expected to be always valid."); 1658 1659 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1660 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1661 1662 llvm::sys::fs::UniqueID ID; 1663 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1664 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1666 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1667 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1668 << PLoc.getFilename() << EC.message(); 1669 } 1670 1671 DeviceID = ID.getDevice(); 1672 FileID = ID.getFile(); 1673 LineNum = PLoc.getLine(); 1674 } 1675 1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1677 if (CGM.getLangOpts().OpenMPSimd) 1678 return Address::invalid(); 1679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1681 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1682 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1683 HasRequiresUnifiedSharedMemory))) { 1684 SmallString<64> PtrName; 1685 { 1686 llvm::raw_svector_ostream OS(PtrName); 1687 OS << CGM.getMangledName(GlobalDecl(VD)); 1688 if (!VD->isExternallyVisible()) { 1689 unsigned DeviceID, FileID, Line; 1690 getTargetEntryUniqueInfo(CGM.getContext(), 1691 VD->getCanonicalDecl()->getBeginLoc(), 1692 DeviceID, FileID, Line); 1693 OS << llvm::format("_%x", FileID); 1694 } 1695 OS << "_decl_tgt_ref_ptr"; 1696 } 1697 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1698 if (!Ptr) { 1699 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1700 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1701 PtrName); 1702 1703 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1704 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1705 1706 if (!CGM.getLangOpts().OpenMPIsDevice) 1707 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1708 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1709 } 1710 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1711 } 1712 return Address::invalid(); 1713 } 1714 1715 llvm::Constant * 1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1717 assert(!CGM.getLangOpts().OpenMPUseTLS || 1718 !CGM.getContext().getTargetInfo().isTLSSupported()); 1719 // Lookup the entry, lazily creating it if necessary. 1720 std::string Suffix = getName({"cache", ""}); 1721 return getOrCreateInternalVariable( 1722 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1723 } 1724 1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1726 const VarDecl *VD, 1727 Address VDAddr, 1728 SourceLocation Loc) { 1729 if (CGM.getLangOpts().OpenMPUseTLS && 1730 CGM.getContext().getTargetInfo().isTLSSupported()) 1731 return VDAddr; 1732 1733 llvm::Type *VarTy = VDAddr.getElementType(); 1734 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1735 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1736 CGM.Int8PtrTy), 1737 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1738 getOrCreateThreadPrivateCache(VD)}; 1739 return Address(CGF.EmitRuntimeCall( 1740 OMPBuilder.getOrCreateRuntimeFunction( 1741 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1742 Args), 1743 VDAddr.getAlignment()); 1744 } 1745 1746 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1750 // library. 1751 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1753 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1754 OMPLoc); 1755 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1756 // to register constructor/destructor for variable. 1757 llvm::Value *Args[] = { 1758 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1759 Ctor, CopyCtor, Dtor}; 1760 CGF.EmitRuntimeCall( 1761 OMPBuilder.getOrCreateRuntimeFunction( 1762 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1763 Args); 1764 } 1765 1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1767 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1768 bool PerformInit, CodeGenFunction *CGF) { 1769 if (CGM.getLangOpts().OpenMPUseTLS && 1770 CGM.getContext().getTargetInfo().isTLSSupported()) 1771 return nullptr; 1772 1773 VD = VD->getDefinition(CGM.getContext()); 1774 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1775 QualType ASTTy = VD->getType(); 1776 1777 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1778 const Expr *Init = VD->getAnyInitializer(); 1779 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1780 // Generate function that re-emits the declaration's initializer into the 1781 // threadprivate copy of the variable VD 1782 CodeGenFunction CtorCGF(CGM); 1783 FunctionArgList Args; 1784 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1785 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1786 ImplicitParamDecl::Other); 1787 Args.push_back(&Dst); 1788 1789 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1790 CGM.getContext().VoidPtrTy, Args); 1791 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1792 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1793 llvm::Function *Fn = 1794 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1795 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1796 Args, Loc, Loc); 1797 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1798 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1799 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1800 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1801 Arg = CtorCGF.Builder.CreateElementBitCast( 1802 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1803 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1804 /*IsInitializer=*/true); 1805 ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1809 CtorCGF.FinishFunction(); 1810 Ctor = Fn; 1811 } 1812 if (VD->getType().isDestructedType() != QualType::DK_none) { 1813 // Generate function that emits destructor call for the threadprivate copy 1814 // of the variable VD 1815 CodeGenFunction DtorCGF(CGM); 1816 FunctionArgList Args; 1817 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1818 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1819 ImplicitParamDecl::Other); 1820 Args.push_back(&Dst); 1821 1822 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1823 CGM.getContext().VoidTy, Args); 1824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1825 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1826 llvm::Function *Fn = 1827 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1828 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1829 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1830 Loc, Loc); 1831 // Create a scope with an artificial location for the body of this function. 1832 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1833 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1834 DtorCGF.GetAddrOfLocalVar(&Dst), 1835 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1836 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1837 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1838 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1839 DtorCGF.FinishFunction(); 1840 Dtor = Fn; 1841 } 1842 // Do not emit init function if it is not required. 1843 if (!Ctor && !Dtor) 1844 return nullptr; 1845 1846 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1847 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1848 /*isVarArg=*/false) 1849 ->getPointerTo(); 1850 // Copying constructor for the threadprivate variable. 1851 // Must be NULL - reserved by runtime, but currently it requires that this 1852 // parameter is always NULL. Otherwise it fires assertion. 1853 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1854 if (Ctor == nullptr) { 1855 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 Ctor = llvm::Constant::getNullValue(CtorTy); 1859 } 1860 if (Dtor == nullptr) { 1861 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Dtor = llvm::Constant::getNullValue(DtorTy); 1865 } 1866 if (!CGF) { 1867 auto *InitFunctionTy = 1868 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1869 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1870 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1871 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1872 CodeGenFunction InitCGF(CGM); 1873 FunctionArgList ArgList; 1874 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1875 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1876 Loc, Loc); 1877 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 InitCGF.FinishFunction(); 1879 return InitFunction; 1880 } 1881 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1882 } 1883 return nullptr; 1884 } 1885 1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1887 llvm::GlobalVariable *Addr, 1888 bool PerformInit) { 1889 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1890 !CGM.getLangOpts().OpenMPIsDevice) 1891 return false; 1892 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1893 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1894 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1895 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1896 HasRequiresUnifiedSharedMemory)) 1897 return CGM.getLangOpts().OpenMPIsDevice; 1898 VD = VD->getDefinition(CGM.getContext()); 1899 assert(VD && "Unknown VarDecl"); 1900 1901 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1902 return CGM.getLangOpts().OpenMPIsDevice; 1903 1904 QualType ASTTy = VD->getType(); 1905 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1906 1907 // Produce the unique prefix to identify the new target regions. We use 1908 // the source location of the variable declaration which we know to not 1909 // conflict with any target region. 1910 unsigned DeviceID; 1911 unsigned FileID; 1912 unsigned Line; 1913 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1914 SmallString<128> Buffer, Out; 1915 { 1916 llvm::raw_svector_ostream OS(Buffer); 1917 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1918 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1919 } 1920 1921 const Expr *Init = VD->getAnyInitializer(); 1922 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1923 llvm::Constant *Ctor; 1924 llvm::Constant *ID; 1925 if (CGM.getLangOpts().OpenMPIsDevice) { 1926 // Generate function that re-emits the declaration's initializer into 1927 // the threadprivate copy of the variable VD 1928 CodeGenFunction CtorCGF(CGM); 1929 1930 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1931 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1932 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1933 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1934 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1935 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1938 CtorCGF.EmitAnyExprToMem(Init, 1939 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1940 Init->getType().getQualifiers(), 1941 /*IsInitializer=*/true); 1942 CtorCGF.FinishFunction(); 1943 Ctor = Fn; 1944 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1945 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1946 } else { 1947 Ctor = new llvm::GlobalVariable( 1948 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1949 llvm::GlobalValue::PrivateLinkage, 1950 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1951 ID = Ctor; 1952 } 1953 1954 // Register the information for the entry associated with the constructor. 1955 Out.clear(); 1956 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1957 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1958 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1959 } 1960 if (VD->getType().isDestructedType() != QualType::DK_none) { 1961 llvm::Constant *Dtor; 1962 llvm::Constant *ID; 1963 if (CGM.getLangOpts().OpenMPIsDevice) { 1964 // Generate function that emits destructor call for the threadprivate 1965 // copy of the variable VD 1966 CodeGenFunction DtorCGF(CGM); 1967 1968 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1969 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1970 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1971 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1972 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1973 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1974 FunctionArgList(), Loc, Loc); 1975 // Create a scope with an artificial location for the body of this 1976 // function. 1977 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1978 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1979 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1980 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1981 DtorCGF.FinishFunction(); 1982 Dtor = Fn; 1983 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1984 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1985 } else { 1986 Dtor = new llvm::GlobalVariable( 1987 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1988 llvm::GlobalValue::PrivateLinkage, 1989 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1990 ID = Dtor; 1991 } 1992 // Register the information for the entry associated with the destructor. 1993 Out.clear(); 1994 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1995 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1996 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1997 } 1998 return CGM.getLangOpts().OpenMPIsDevice; 1999 } 2000 2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2002 QualType VarType, 2003 StringRef Name) { 2004 std::string Suffix = getName({"artificial", ""}); 2005 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2006 llvm::Value *GAddr = 2007 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2008 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2009 CGM.getTarget().isTLSSupported()) { 2010 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2011 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2012 } 2013 std::string CacheSuffix = getName({"cache", ""}); 2014 llvm::Value *Args[] = { 2015 emitUpdateLocation(CGF, SourceLocation()), 2016 getThreadID(CGF, SourceLocation()), 2017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2018 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2019 /*isSigned=*/false), 2020 getOrCreateInternalVariable( 2021 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2022 return Address( 2023 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2024 CGF.EmitRuntimeCall( 2025 OMPBuilder.getOrCreateRuntimeFunction( 2026 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2027 Args), 2028 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2029 CGM.getContext().getTypeAlignInChars(VarType)); 2030 } 2031 2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2033 const RegionCodeGenTy &ThenGen, 2034 const RegionCodeGenTy &ElseGen) { 2035 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2036 2037 // If the condition constant folds and can be elided, try to avoid emitting 2038 // the condition and the dead arm of the if/else. 2039 bool CondConstant; 2040 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2041 if (CondConstant) 2042 ThenGen(CGF); 2043 else 2044 ElseGen(CGF); 2045 return; 2046 } 2047 2048 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2049 // emit the conditional branch. 2050 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2051 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2052 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2053 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2054 2055 // Emit the 'then' code. 2056 CGF.EmitBlock(ThenBlock); 2057 ThenGen(CGF); 2058 CGF.EmitBranch(ContBlock); 2059 // Emit the 'else' code if present. 2060 // There is no need to emit line number for unconditional branch. 2061 (void)ApplyDebugLocation::CreateEmpty(CGF); 2062 CGF.EmitBlock(ElseBlock); 2063 ElseGen(CGF); 2064 // There is no need to emit line number for unconditional branch. 2065 (void)ApplyDebugLocation::CreateEmpty(CGF); 2066 CGF.EmitBranch(ContBlock); 2067 // Emit the continuation block for code after the if. 2068 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2069 } 2070 2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2072 llvm::Function *OutlinedFn, 2073 ArrayRef<llvm::Value *> CapturedVars, 2074 const Expr *IfCond) { 2075 if (!CGF.HaveInsertPoint()) 2076 return; 2077 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2078 auto &M = CGM.getModule(); 2079 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2082 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2083 llvm::Value *Args[] = { 2084 RTLoc, 2085 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2086 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2087 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2088 RealArgs.append(std::begin(Args), std::end(Args)); 2089 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2090 2091 llvm::FunctionCallee RTLFn = 2092 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2093 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2094 }; 2095 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2096 this](CodeGenFunction &CGF, PrePostActionTy &) { 2097 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2098 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2099 // Build calls: 2100 // __kmpc_serialized_parallel(&Loc, GTid); 2101 llvm::Value *Args[] = {RTLoc, ThreadID}; 2102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2103 M, OMPRTL___kmpc_serialized_parallel), 2104 Args); 2105 2106 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2107 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2108 Address ZeroAddrBound = 2109 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2110 /*Name=*/".bound.zero.addr"); 2111 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2112 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2113 // ThreadId for serialized parallels is 0. 2114 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2115 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2116 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2117 2118 // Ensure we do not inline the function. This is trivially true for the ones 2119 // passed to __kmpc_fork_call but the ones calles in serialized regions 2120 // could be inlined. This is not a perfect but it is closer to the invariant 2121 // we want, namely, every data environment starts with a new function. 2122 // TODO: We should pass the if condition to the runtime function and do the 2123 // handling there. Much cleaner code. 2124 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2125 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2126 2127 // __kmpc_end_serialized_parallel(&Loc, GTid); 2128 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2130 M, OMPRTL___kmpc_end_serialized_parallel), 2131 EndArgs); 2132 }; 2133 if (IfCond) { 2134 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2135 } else { 2136 RegionCodeGenTy ThenRCG(ThenGen); 2137 ThenRCG(CGF); 2138 } 2139 } 2140 2141 // If we're inside an (outlined) parallel region, use the region info's 2142 // thread-ID variable (it is passed in a first argument of the outlined function 2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2144 // regular serial code region, get thread ID by calling kmp_int32 2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2146 // return the address of that temp. 2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2148 SourceLocation Loc) { 2149 if (auto *OMPRegionInfo = 2150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 if (OMPRegionInfo->getThreadIDVariable()) 2152 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2153 2154 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2155 QualType Int32Ty = 2156 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2157 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2158 CGF.EmitStoreOfScalar(ThreadID, 2159 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2160 2161 return ThreadIDTemp; 2162 } 2163 2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2165 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2166 SmallString<256> Buffer; 2167 llvm::raw_svector_ostream Out(Buffer); 2168 Out << Name; 2169 StringRef RuntimeName = Out.str(); 2170 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2171 if (Elem.second) { 2172 assert(Elem.second->getType()->getPointerElementType() == Ty && 2173 "OMP internal variable has different type than requested"); 2174 return &*Elem.second; 2175 } 2176 2177 return Elem.second = new llvm::GlobalVariable( 2178 CGM.getModule(), Ty, /*IsConstant*/ false, 2179 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2180 Elem.first(), /*InsertBefore=*/nullptr, 2181 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2182 } 2183 2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2185 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2186 std::string Name = getName({Prefix, "var"}); 2187 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2188 } 2189 2190 namespace { 2191 /// Common pre(post)-action for different OpenMP constructs. 2192 class CommonActionTy final : public PrePostActionTy { 2193 llvm::FunctionCallee EnterCallee; 2194 ArrayRef<llvm::Value *> EnterArgs; 2195 llvm::FunctionCallee ExitCallee; 2196 ArrayRef<llvm::Value *> ExitArgs; 2197 bool Conditional; 2198 llvm::BasicBlock *ContBlock = nullptr; 2199 2200 public: 2201 CommonActionTy(llvm::FunctionCallee EnterCallee, 2202 ArrayRef<llvm::Value *> EnterArgs, 2203 llvm::FunctionCallee ExitCallee, 2204 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2205 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2206 ExitArgs(ExitArgs), Conditional(Conditional) {} 2207 void Enter(CodeGenFunction &CGF) override { 2208 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2209 if (Conditional) { 2210 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2211 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2212 ContBlock = CGF.createBasicBlock("omp_if.end"); 2213 // Generate the branch (If-stmt) 2214 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2215 CGF.EmitBlock(ThenBlock); 2216 } 2217 } 2218 void Done(CodeGenFunction &CGF) { 2219 // Emit the rest of blocks/branches 2220 CGF.EmitBranch(ContBlock); 2221 CGF.EmitBlock(ContBlock, true); 2222 } 2223 void Exit(CodeGenFunction &CGF) override { 2224 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2225 } 2226 }; 2227 } // anonymous namespace 2228 2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2230 StringRef CriticalName, 2231 const RegionCodeGenTy &CriticalOpGen, 2232 SourceLocation Loc, const Expr *Hint) { 2233 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2234 // CriticalOpGen(); 2235 // __kmpc_end_critical(ident_t *, gtid, Lock); 2236 // Prepare arguments and build a call to __kmpc_critical 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2240 getCriticalRegionLock(CriticalName)}; 2241 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2242 std::end(Args)); 2243 if (Hint) { 2244 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2245 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2246 } 2247 CommonActionTy Action( 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), 2250 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2251 EnterArgs, 2252 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2253 OMPRTL___kmpc_end_critical), 2254 Args); 2255 CriticalOpGen.setAction(Action); 2256 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2257 } 2258 2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2260 const RegionCodeGenTy &MasterOpGen, 2261 SourceLocation Loc) { 2262 if (!CGF.HaveInsertPoint()) 2263 return; 2264 // if(__kmpc_master(ident_t *, gtid)) { 2265 // MasterOpGen(); 2266 // __kmpc_end_master(ident_t *, gtid); 2267 // } 2268 // Prepare arguments and build a call to __kmpc_master 2269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2270 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2271 CGM.getModule(), OMPRTL___kmpc_master), 2272 Args, 2273 OMPBuilder.getOrCreateRuntimeFunction( 2274 CGM.getModule(), OMPRTL___kmpc_end_master), 2275 Args, 2276 /*Conditional=*/true); 2277 MasterOpGen.setAction(Action); 2278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2279 Action.Done(CGF); 2280 } 2281 2282 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2283 SourceLocation Loc) { 2284 if (!CGF.HaveInsertPoint()) 2285 return; 2286 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2287 OMPBuilder.createTaskyield(CGF.Builder); 2288 } else { 2289 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2290 llvm::Value *Args[] = { 2291 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2292 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2293 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2294 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2295 Args); 2296 } 2297 2298 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2299 Region->emitUntiedSwitch(CGF); 2300 } 2301 2302 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2303 const RegionCodeGenTy &TaskgroupOpGen, 2304 SourceLocation Loc) { 2305 if (!CGF.HaveInsertPoint()) 2306 return; 2307 // __kmpc_taskgroup(ident_t *, gtid); 2308 // TaskgroupOpGen(); 2309 // __kmpc_end_taskgroup(ident_t *, gtid); 2310 // Prepare arguments and build a call to __kmpc_taskgroup 2311 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2312 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2313 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2314 Args, 2315 OMPBuilder.getOrCreateRuntimeFunction( 2316 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2317 Args); 2318 TaskgroupOpGen.setAction(Action); 2319 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2320 } 2321 2322 /// Given an array of pointers to variables, project the address of a 2323 /// given variable. 2324 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2325 unsigned Index, const VarDecl *Var) { 2326 // Pull out the pointer to the variable. 2327 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2328 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2329 2330 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2331 Addr = CGF.Builder.CreateElementBitCast( 2332 Addr, CGF.ConvertTypeForMem(Var->getType())); 2333 return Addr; 2334 } 2335 2336 static llvm::Value *emitCopyprivateCopyFunction( 2337 CodeGenModule &CGM, llvm::Type *ArgsType, 2338 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2339 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2340 SourceLocation Loc) { 2341 ASTContext &C = CGM.getContext(); 2342 // void copy_func(void *LHSArg, void *RHSArg); 2343 FunctionArgList Args; 2344 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2345 ImplicitParamDecl::Other); 2346 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2347 ImplicitParamDecl::Other); 2348 Args.push_back(&LHSArg); 2349 Args.push_back(&RHSArg); 2350 const auto &CGFI = 2351 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2352 std::string Name = 2353 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2354 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2355 llvm::GlobalValue::InternalLinkage, Name, 2356 &CGM.getModule()); 2357 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2358 Fn->setDoesNotRecurse(); 2359 CodeGenFunction CGF(CGM); 2360 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2361 // Dest = (void*[n])(LHSArg); 2362 // Src = (void*[n])(RHSArg); 2363 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2364 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2365 ArgsType), CGF.getPointerAlign()); 2366 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2367 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2368 ArgsType), CGF.getPointerAlign()); 2369 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2370 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2371 // ... 2372 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2373 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2374 const auto *DestVar = 2375 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2376 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2377 2378 const auto *SrcVar = 2379 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2380 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2381 2382 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2383 QualType Type = VD->getType(); 2384 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2385 } 2386 CGF.FinishFunction(); 2387 return Fn; 2388 } 2389 2390 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2391 const RegionCodeGenTy &SingleOpGen, 2392 SourceLocation Loc, 2393 ArrayRef<const Expr *> CopyprivateVars, 2394 ArrayRef<const Expr *> SrcExprs, 2395 ArrayRef<const Expr *> DstExprs, 2396 ArrayRef<const Expr *> AssignmentOps) { 2397 if (!CGF.HaveInsertPoint()) 2398 return; 2399 assert(CopyprivateVars.size() == SrcExprs.size() && 2400 CopyprivateVars.size() == DstExprs.size() && 2401 CopyprivateVars.size() == AssignmentOps.size()); 2402 ASTContext &C = CGM.getContext(); 2403 // int32 did_it = 0; 2404 // if(__kmpc_single(ident_t *, gtid)) { 2405 // SingleOpGen(); 2406 // __kmpc_end_single(ident_t *, gtid); 2407 // did_it = 1; 2408 // } 2409 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2410 // <copy_func>, did_it); 2411 2412 Address DidIt = Address::invalid(); 2413 if (!CopyprivateVars.empty()) { 2414 // int32 did_it = 0; 2415 QualType KmpInt32Ty = 2416 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2417 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2418 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2419 } 2420 // Prepare arguments and build a call to __kmpc_single 2421 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2422 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2423 CGM.getModule(), OMPRTL___kmpc_single), 2424 Args, 2425 OMPBuilder.getOrCreateRuntimeFunction( 2426 CGM.getModule(), OMPRTL___kmpc_end_single), 2427 Args, 2428 /*Conditional=*/true); 2429 SingleOpGen.setAction(Action); 2430 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2431 if (DidIt.isValid()) { 2432 // did_it = 1; 2433 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2434 } 2435 Action.Done(CGF); 2436 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2437 // <copy_func>, did_it); 2438 if (DidIt.isValid()) { 2439 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2440 QualType CopyprivateArrayTy = C.getConstantArrayType( 2441 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2442 /*IndexTypeQuals=*/0); 2443 // Create a list of all private variables for copyprivate. 2444 Address CopyprivateList = 2445 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2446 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2447 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2448 CGF.Builder.CreateStore( 2449 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2450 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2451 CGF.VoidPtrTy), 2452 Elem); 2453 } 2454 // Build function that copies private values from single region to all other 2455 // threads in the corresponding parallel region. 2456 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2457 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2458 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2459 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2460 Address CL = 2461 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2462 CGF.VoidPtrTy); 2463 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2464 llvm::Value *Args[] = { 2465 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2466 getThreadID(CGF, Loc), // i32 <gtid> 2467 BufSize, // size_t <buf_size> 2468 CL.getPointer(), // void *<copyprivate list> 2469 CpyFn, // void (*) (void *, void *) <copy_func> 2470 DidItVal // i32 did_it 2471 }; 2472 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2473 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2474 Args); 2475 } 2476 } 2477 2478 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2479 const RegionCodeGenTy &OrderedOpGen, 2480 SourceLocation Loc, bool IsThreads) { 2481 if (!CGF.HaveInsertPoint()) 2482 return; 2483 // __kmpc_ordered(ident_t *, gtid); 2484 // OrderedOpGen(); 2485 // __kmpc_end_ordered(ident_t *, gtid); 2486 // Prepare arguments and build a call to __kmpc_ordered 2487 if (IsThreads) { 2488 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2489 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2490 CGM.getModule(), OMPRTL___kmpc_ordered), 2491 Args, 2492 OMPBuilder.getOrCreateRuntimeFunction( 2493 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2494 Args); 2495 OrderedOpGen.setAction(Action); 2496 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2497 return; 2498 } 2499 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2500 } 2501 2502 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2503 unsigned Flags; 2504 if (Kind == OMPD_for) 2505 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2506 else if (Kind == OMPD_sections) 2507 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2508 else if (Kind == OMPD_single) 2509 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2510 else if (Kind == OMPD_barrier) 2511 Flags = OMP_IDENT_BARRIER_EXPL; 2512 else 2513 Flags = OMP_IDENT_BARRIER_IMPL; 2514 return Flags; 2515 } 2516 2517 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2518 CodeGenFunction &CGF, const OMPLoopDirective &S, 2519 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2520 // Check if the loop directive is actually a doacross loop directive. In this 2521 // case choose static, 1 schedule. 2522 if (llvm::any_of( 2523 S.getClausesOfKind<OMPOrderedClause>(), 2524 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2525 ScheduleKind = OMPC_SCHEDULE_static; 2526 // Chunk size is 1 in this case. 2527 llvm::APInt ChunkSize(32, 1); 2528 ChunkExpr = IntegerLiteral::Create( 2529 CGF.getContext(), ChunkSize, 2530 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2531 SourceLocation()); 2532 } 2533 } 2534 2535 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2536 OpenMPDirectiveKind Kind, bool EmitChecks, 2537 bool ForceSimpleCall) { 2538 // Check if we should use the OMPBuilder 2539 auto *OMPRegionInfo = 2540 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2541 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2542 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2543 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2544 return; 2545 } 2546 2547 if (!CGF.HaveInsertPoint()) 2548 return; 2549 // Build call __kmpc_cancel_barrier(loc, thread_id); 2550 // Build call __kmpc_barrier(loc, thread_id); 2551 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2552 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2553 // thread_id); 2554 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2555 getThreadID(CGF, Loc)}; 2556 if (OMPRegionInfo) { 2557 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2558 llvm::Value *Result = CGF.EmitRuntimeCall( 2559 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2560 OMPRTL___kmpc_cancel_barrier), 2561 Args); 2562 if (EmitChecks) { 2563 // if (__kmpc_cancel_barrier()) { 2564 // exit from construct; 2565 // } 2566 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2567 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2568 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2569 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2570 CGF.EmitBlock(ExitBB); 2571 // exit from construct; 2572 CodeGenFunction::JumpDest CancelDestination = 2573 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2574 CGF.EmitBranchThroughCleanup(CancelDestination); 2575 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2576 } 2577 return; 2578 } 2579 } 2580 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2581 CGM.getModule(), OMPRTL___kmpc_barrier), 2582 Args); 2583 } 2584 2585 /// Map the OpenMP loop schedule to the runtime enumeration. 2586 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2587 bool Chunked, bool Ordered) { 2588 switch (ScheduleKind) { 2589 case OMPC_SCHEDULE_static: 2590 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2591 : (Ordered ? OMP_ord_static : OMP_sch_static); 2592 case OMPC_SCHEDULE_dynamic: 2593 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2594 case OMPC_SCHEDULE_guided: 2595 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2596 case OMPC_SCHEDULE_runtime: 2597 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2598 case OMPC_SCHEDULE_auto: 2599 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2600 case OMPC_SCHEDULE_unknown: 2601 assert(!Chunked && "chunk was specified but schedule kind not known"); 2602 return Ordered ? OMP_ord_static : OMP_sch_static; 2603 } 2604 llvm_unreachable("Unexpected runtime schedule"); 2605 } 2606 2607 /// Map the OpenMP distribute schedule to the runtime enumeration. 2608 static OpenMPSchedType 2609 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2610 // only static is allowed for dist_schedule 2611 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2612 } 2613 2614 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2615 bool Chunked) const { 2616 OpenMPSchedType Schedule = 2617 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2618 return Schedule == OMP_sch_static; 2619 } 2620 2621 bool CGOpenMPRuntime::isStaticNonchunked( 2622 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2623 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2624 return Schedule == OMP_dist_sch_static; 2625 } 2626 2627 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2628 bool Chunked) const { 2629 OpenMPSchedType Schedule = 2630 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2631 return Schedule == OMP_sch_static_chunked; 2632 } 2633 2634 bool CGOpenMPRuntime::isStaticChunked( 2635 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2636 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2637 return Schedule == OMP_dist_sch_static_chunked; 2638 } 2639 2640 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2641 OpenMPSchedType Schedule = 2642 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2643 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2644 return Schedule != OMP_sch_static; 2645 } 2646 2647 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2648 OpenMPScheduleClauseModifier M1, 2649 OpenMPScheduleClauseModifier M2) { 2650 int Modifier = 0; 2651 switch (M1) { 2652 case OMPC_SCHEDULE_MODIFIER_monotonic: 2653 Modifier = OMP_sch_modifier_monotonic; 2654 break; 2655 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2656 Modifier = OMP_sch_modifier_nonmonotonic; 2657 break; 2658 case OMPC_SCHEDULE_MODIFIER_simd: 2659 if (Schedule == OMP_sch_static_chunked) 2660 Schedule = OMP_sch_static_balanced_chunked; 2661 break; 2662 case OMPC_SCHEDULE_MODIFIER_last: 2663 case OMPC_SCHEDULE_MODIFIER_unknown: 2664 break; 2665 } 2666 switch (M2) { 2667 case OMPC_SCHEDULE_MODIFIER_monotonic: 2668 Modifier = OMP_sch_modifier_monotonic; 2669 break; 2670 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2671 Modifier = OMP_sch_modifier_nonmonotonic; 2672 break; 2673 case OMPC_SCHEDULE_MODIFIER_simd: 2674 if (Schedule == OMP_sch_static_chunked) 2675 Schedule = OMP_sch_static_balanced_chunked; 2676 break; 2677 case OMPC_SCHEDULE_MODIFIER_last: 2678 case OMPC_SCHEDULE_MODIFIER_unknown: 2679 break; 2680 } 2681 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2682 // If the static schedule kind is specified or if the ordered clause is 2683 // specified, and if the nonmonotonic modifier is not specified, the effect is 2684 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2685 // modifier is specified, the effect is as if the nonmonotonic modifier is 2686 // specified. 2687 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2688 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2689 Schedule == OMP_sch_static_balanced_chunked || 2690 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2691 Schedule == OMP_dist_sch_static_chunked || 2692 Schedule == OMP_dist_sch_static)) 2693 Modifier = OMP_sch_modifier_nonmonotonic; 2694 } 2695 return Schedule | Modifier; 2696 } 2697 2698 void CGOpenMPRuntime::emitForDispatchInit( 2699 CodeGenFunction &CGF, SourceLocation Loc, 2700 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2701 bool Ordered, const DispatchRTInput &DispatchValues) { 2702 if (!CGF.HaveInsertPoint()) 2703 return; 2704 OpenMPSchedType Schedule = getRuntimeSchedule( 2705 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2706 assert(Ordered || 2707 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2708 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2709 Schedule != OMP_sch_static_balanced_chunked)); 2710 // Call __kmpc_dispatch_init( 2711 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2712 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2713 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2714 2715 // If the Chunk was not specified in the clause - use default value 1. 2716 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2717 : CGF.Builder.getIntN(IVSize, 1); 2718 llvm::Value *Args[] = { 2719 emitUpdateLocation(CGF, Loc), 2720 getThreadID(CGF, Loc), 2721 CGF.Builder.getInt32(addMonoNonMonoModifier( 2722 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2723 DispatchValues.LB, // Lower 2724 DispatchValues.UB, // Upper 2725 CGF.Builder.getIntN(IVSize, 1), // Stride 2726 Chunk // Chunk 2727 }; 2728 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2729 } 2730 2731 static void emitForStaticInitCall( 2732 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2733 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2734 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2735 const CGOpenMPRuntime::StaticRTInput &Values) { 2736 if (!CGF.HaveInsertPoint()) 2737 return; 2738 2739 assert(!Values.Ordered); 2740 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2741 Schedule == OMP_sch_static_balanced_chunked || 2742 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2743 Schedule == OMP_dist_sch_static || 2744 Schedule == OMP_dist_sch_static_chunked); 2745 2746 // Call __kmpc_for_static_init( 2747 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2748 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2749 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2750 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2751 llvm::Value *Chunk = Values.Chunk; 2752 if (Chunk == nullptr) { 2753 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2754 Schedule == OMP_dist_sch_static) && 2755 "expected static non-chunked schedule"); 2756 // If the Chunk was not specified in the clause - use default value 1. 2757 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2758 } else { 2759 assert((Schedule == OMP_sch_static_chunked || 2760 Schedule == OMP_sch_static_balanced_chunked || 2761 Schedule == OMP_ord_static_chunked || 2762 Schedule == OMP_dist_sch_static_chunked) && 2763 "expected static chunked schedule"); 2764 } 2765 llvm::Value *Args[] = { 2766 UpdateLocation, 2767 ThreadId, 2768 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2769 M2)), // Schedule type 2770 Values.IL.getPointer(), // &isLastIter 2771 Values.LB.getPointer(), // &LB 2772 Values.UB.getPointer(), // &UB 2773 Values.ST.getPointer(), // &Stride 2774 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2775 Chunk // Chunk 2776 }; 2777 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2778 } 2779 2780 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2781 SourceLocation Loc, 2782 OpenMPDirectiveKind DKind, 2783 const OpenMPScheduleTy &ScheduleKind, 2784 const StaticRTInput &Values) { 2785 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2786 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2787 assert(isOpenMPWorksharingDirective(DKind) && 2788 "Expected loop-based or sections-based directive."); 2789 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2790 isOpenMPLoopDirective(DKind) 2791 ? OMP_IDENT_WORK_LOOP 2792 : OMP_IDENT_WORK_SECTIONS); 2793 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2794 llvm::FunctionCallee StaticInitFunction = 2795 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2796 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2797 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2798 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2799 } 2800 2801 void CGOpenMPRuntime::emitDistributeStaticInit( 2802 CodeGenFunction &CGF, SourceLocation Loc, 2803 OpenMPDistScheduleClauseKind SchedKind, 2804 const CGOpenMPRuntime::StaticRTInput &Values) { 2805 OpenMPSchedType ScheduleNum = 2806 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2807 llvm::Value *UpdatedLocation = 2808 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2809 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2810 llvm::FunctionCallee StaticInitFunction = 2811 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2812 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2813 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2814 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2815 } 2816 2817 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2818 SourceLocation Loc, 2819 OpenMPDirectiveKind DKind) { 2820 if (!CGF.HaveInsertPoint()) 2821 return; 2822 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2823 llvm::Value *Args[] = { 2824 emitUpdateLocation(CGF, Loc, 2825 isOpenMPDistributeDirective(DKind) 2826 ? OMP_IDENT_WORK_DISTRIBUTE 2827 : isOpenMPLoopDirective(DKind) 2828 ? OMP_IDENT_WORK_LOOP 2829 : OMP_IDENT_WORK_SECTIONS), 2830 getThreadID(CGF, Loc)}; 2831 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2832 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2833 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2834 Args); 2835 } 2836 2837 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2838 SourceLocation Loc, 2839 unsigned IVSize, 2840 bool IVSigned) { 2841 if (!CGF.HaveInsertPoint()) 2842 return; 2843 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2844 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2845 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2846 } 2847 2848 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2849 SourceLocation Loc, unsigned IVSize, 2850 bool IVSigned, Address IL, 2851 Address LB, Address UB, 2852 Address ST) { 2853 // Call __kmpc_dispatch_next( 2854 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2855 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2856 // kmp_int[32|64] *p_stride); 2857 llvm::Value *Args[] = { 2858 emitUpdateLocation(CGF, Loc), 2859 getThreadID(CGF, Loc), 2860 IL.getPointer(), // &isLastIter 2861 LB.getPointer(), // &Lower 2862 UB.getPointer(), // &Upper 2863 ST.getPointer() // &Stride 2864 }; 2865 llvm::Value *Call = 2866 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2867 return CGF.EmitScalarConversion( 2868 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2869 CGF.getContext().BoolTy, Loc); 2870 } 2871 2872 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2873 llvm::Value *NumThreads, 2874 SourceLocation Loc) { 2875 if (!CGF.HaveInsertPoint()) 2876 return; 2877 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2878 llvm::Value *Args[] = { 2879 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2880 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2882 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2883 Args); 2884 } 2885 2886 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2887 ProcBindKind ProcBind, 2888 SourceLocation Loc) { 2889 if (!CGF.HaveInsertPoint()) 2890 return; 2891 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2892 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2893 llvm::Value *Args[] = { 2894 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2895 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2896 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2897 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2898 Args); 2899 } 2900 2901 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2902 SourceLocation Loc, llvm::AtomicOrdering AO) { 2903 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2904 OMPBuilder.createFlush(CGF.Builder); 2905 } else { 2906 if (!CGF.HaveInsertPoint()) 2907 return; 2908 // Build call void __kmpc_flush(ident_t *loc) 2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2910 CGM.getModule(), OMPRTL___kmpc_flush), 2911 emitUpdateLocation(CGF, Loc)); 2912 } 2913 } 2914 2915 namespace { 2916 /// Indexes of fields for type kmp_task_t. 2917 enum KmpTaskTFields { 2918 /// List of shared variables. 2919 KmpTaskTShareds, 2920 /// Task routine. 2921 KmpTaskTRoutine, 2922 /// Partition id for the untied tasks. 2923 KmpTaskTPartId, 2924 /// Function with call of destructors for private variables. 2925 Data1, 2926 /// Task priority. 2927 Data2, 2928 /// (Taskloops only) Lower bound. 2929 KmpTaskTLowerBound, 2930 /// (Taskloops only) Upper bound. 2931 KmpTaskTUpperBound, 2932 /// (Taskloops only) Stride. 2933 KmpTaskTStride, 2934 /// (Taskloops only) Is last iteration flag. 2935 KmpTaskTLastIter, 2936 /// (Taskloops only) Reduction data. 2937 KmpTaskTReductions, 2938 }; 2939 } // anonymous namespace 2940 2941 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2942 return OffloadEntriesTargetRegion.empty() && 2943 OffloadEntriesDeviceGlobalVar.empty(); 2944 } 2945 2946 /// Initialize target region entry. 2947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2948 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2949 StringRef ParentName, unsigned LineNum, 2950 unsigned Order) { 2951 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2952 "only required for the device " 2953 "code generation."); 2954 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2955 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2956 OMPTargetRegionEntryTargetRegion); 2957 ++OffloadingEntriesNum; 2958 } 2959 2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2961 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2962 StringRef ParentName, unsigned LineNum, 2963 llvm::Constant *Addr, llvm::Constant *ID, 2964 OMPTargetRegionEntryKind Flags) { 2965 // If we are emitting code for a target, the entry is already initialized, 2966 // only has to be registered. 2967 if (CGM.getLangOpts().OpenMPIsDevice) { 2968 // This could happen if the device compilation is invoked standalone. 2969 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2970 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2971 OffloadingEntriesNum); 2972 auto &Entry = 2973 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2974 Entry.setAddress(Addr); 2975 Entry.setID(ID); 2976 Entry.setFlags(Flags); 2977 } else { 2978 if (Flags == 2979 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2980 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2981 /*IgnoreAddressId*/ true)) 2982 return; 2983 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2984 "Target region entry already registered!"); 2985 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2986 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2987 ++OffloadingEntriesNum; 2988 } 2989 } 2990 2991 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2992 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2993 bool IgnoreAddressId) const { 2994 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2995 if (PerDevice == OffloadEntriesTargetRegion.end()) 2996 return false; 2997 auto PerFile = PerDevice->second.find(FileID); 2998 if (PerFile == PerDevice->second.end()) 2999 return false; 3000 auto PerParentName = PerFile->second.find(ParentName); 3001 if (PerParentName == PerFile->second.end()) 3002 return false; 3003 auto PerLine = PerParentName->second.find(LineNum); 3004 if (PerLine == PerParentName->second.end()) 3005 return false; 3006 // Fail if this entry is already registered. 3007 if (!IgnoreAddressId && 3008 (PerLine->second.getAddress() || PerLine->second.getID())) 3009 return false; 3010 return true; 3011 } 3012 3013 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3014 const OffloadTargetRegionEntryInfoActTy &Action) { 3015 // Scan all target region entries and perform the provided action. 3016 for (const auto &D : OffloadEntriesTargetRegion) 3017 for (const auto &F : D.second) 3018 for (const auto &P : F.second) 3019 for (const auto &L : P.second) 3020 Action(D.first, F.first, P.first(), L.first, L.second); 3021 } 3022 3023 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3024 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3025 OMPTargetGlobalVarEntryKind Flags, 3026 unsigned Order) { 3027 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3028 "only required for the device " 3029 "code generation."); 3030 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3031 ++OffloadingEntriesNum; 3032 } 3033 3034 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3035 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3036 CharUnits VarSize, 3037 OMPTargetGlobalVarEntryKind Flags, 3038 llvm::GlobalValue::LinkageTypes Linkage) { 3039 if (CGM.getLangOpts().OpenMPIsDevice) { 3040 // This could happen if the device compilation is invoked standalone. 3041 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3042 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3043 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3044 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3045 "Resetting with the new address."); 3046 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3047 if (Entry.getVarSize().isZero()) { 3048 Entry.setVarSize(VarSize); 3049 Entry.setLinkage(Linkage); 3050 } 3051 return; 3052 } 3053 Entry.setVarSize(VarSize); 3054 Entry.setLinkage(Linkage); 3055 Entry.setAddress(Addr); 3056 } else { 3057 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3058 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3059 assert(Entry.isValid() && Entry.getFlags() == Flags && 3060 "Entry not initialized!"); 3061 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3062 "Resetting with the new address."); 3063 if (Entry.getVarSize().isZero()) { 3064 Entry.setVarSize(VarSize); 3065 Entry.setLinkage(Linkage); 3066 } 3067 return; 3068 } 3069 OffloadEntriesDeviceGlobalVar.try_emplace( 3070 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3071 ++OffloadingEntriesNum; 3072 } 3073 } 3074 3075 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3076 actOnDeviceGlobalVarEntriesInfo( 3077 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3078 // Scan all target region entries and perform the provided action. 3079 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3080 Action(E.getKey(), E.getValue()); 3081 } 3082 3083 void CGOpenMPRuntime::createOffloadEntry( 3084 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3085 llvm::GlobalValue::LinkageTypes Linkage) { 3086 StringRef Name = Addr->getName(); 3087 llvm::Module &M = CGM.getModule(); 3088 llvm::LLVMContext &C = M.getContext(); 3089 3090 // Create constant string with the name. 3091 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3092 3093 std::string StringName = getName({"omp_offloading", "entry_name"}); 3094 auto *Str = new llvm::GlobalVariable( 3095 M, StrPtrInit->getType(), /*isConstant=*/true, 3096 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3097 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3098 3099 llvm::Constant *Data[] = { 3100 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3101 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3102 llvm::ConstantInt::get(CGM.SizeTy, Size), 3103 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3104 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3105 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3106 llvm::GlobalVariable *Entry = createGlobalStruct( 3107 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3108 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3109 3110 // The entry has to be created in the section the linker expects it to be. 3111 Entry->setSection("omp_offloading_entries"); 3112 } 3113 3114 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3115 // Emit the offloading entries and metadata so that the device codegen side 3116 // can easily figure out what to emit. The produced metadata looks like 3117 // this: 3118 // 3119 // !omp_offload.info = !{!1, ...} 3120 // 3121 // Right now we only generate metadata for function that contain target 3122 // regions. 3123 3124 // If we are in simd mode or there are no entries, we don't need to do 3125 // anything. 3126 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3127 return; 3128 3129 llvm::Module &M = CGM.getModule(); 3130 llvm::LLVMContext &C = M.getContext(); 3131 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3132 SourceLocation, StringRef>, 3133 16> 3134 OrderedEntries(OffloadEntriesInfoManager.size()); 3135 llvm::SmallVector<StringRef, 16> ParentFunctions( 3136 OffloadEntriesInfoManager.size()); 3137 3138 // Auxiliary methods to create metadata values and strings. 3139 auto &&GetMDInt = [this](unsigned V) { 3140 return llvm::ConstantAsMetadata::get( 3141 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3142 }; 3143 3144 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3145 3146 // Create the offloading info metadata node. 3147 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3148 3149 // Create function that emits metadata for each target region entry; 3150 auto &&TargetRegionMetadataEmitter = 3151 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3152 &GetMDString]( 3153 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3154 unsigned Line, 3155 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3156 // Generate metadata for target regions. Each entry of this metadata 3157 // contains: 3158 // - Entry 0 -> Kind of this type of metadata (0). 3159 // - Entry 1 -> Device ID of the file where the entry was identified. 3160 // - Entry 2 -> File ID of the file where the entry was identified. 3161 // - Entry 3 -> Mangled name of the function where the entry was 3162 // identified. 3163 // - Entry 4 -> Line in the file where the entry was identified. 3164 // - Entry 5 -> Order the entry was created. 3165 // The first element of the metadata node is the kind. 3166 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3167 GetMDInt(FileID), GetMDString(ParentName), 3168 GetMDInt(Line), GetMDInt(E.getOrder())}; 3169 3170 SourceLocation Loc; 3171 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3172 E = CGM.getContext().getSourceManager().fileinfo_end(); 3173 I != E; ++I) { 3174 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3175 I->getFirst()->getUniqueID().getFile() == FileID) { 3176 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3177 I->getFirst(), Line, 1); 3178 break; 3179 } 3180 } 3181 // Save this entry in the right position of the ordered entries array. 3182 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3183 ParentFunctions[E.getOrder()] = ParentName; 3184 3185 // Add metadata to the named metadata node. 3186 MD->addOperand(llvm::MDNode::get(C, Ops)); 3187 }; 3188 3189 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3190 TargetRegionMetadataEmitter); 3191 3192 // Create function that emits metadata for each device global variable entry; 3193 auto &&DeviceGlobalVarMetadataEmitter = 3194 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3195 MD](StringRef MangledName, 3196 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3197 &E) { 3198 // Generate metadata for global variables. Each entry of this metadata 3199 // contains: 3200 // - Entry 0 -> Kind of this type of metadata (1). 3201 // - Entry 1 -> Mangled name of the variable. 3202 // - Entry 2 -> Declare target kind. 3203 // - Entry 3 -> Order the entry was created. 3204 // The first element of the metadata node is the kind. 3205 llvm::Metadata *Ops[] = { 3206 GetMDInt(E.getKind()), GetMDString(MangledName), 3207 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3208 3209 // Save this entry in the right position of the ordered entries array. 3210 OrderedEntries[E.getOrder()] = 3211 std::make_tuple(&E, SourceLocation(), MangledName); 3212 3213 // Add metadata to the named metadata node. 3214 MD->addOperand(llvm::MDNode::get(C, Ops)); 3215 }; 3216 3217 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3218 DeviceGlobalVarMetadataEmitter); 3219 3220 for (const auto &E : OrderedEntries) { 3221 assert(std::get<0>(E) && "All ordered entries must exist!"); 3222 if (const auto *CE = 3223 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3224 std::get<0>(E))) { 3225 if (!CE->getID() || !CE->getAddress()) { 3226 // Do not blame the entry if the parent funtion is not emitted. 3227 StringRef FnName = ParentFunctions[CE->getOrder()]; 3228 if (!CGM.GetGlobalValue(FnName)) 3229 continue; 3230 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3231 DiagnosticsEngine::Error, 3232 "Offloading entry for target region in %0 is incorrect: either the " 3233 "address or the ID is invalid."); 3234 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3235 continue; 3236 } 3237 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3238 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3239 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3240 OffloadEntryInfoDeviceGlobalVar>( 3241 std::get<0>(E))) { 3242 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3243 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3244 CE->getFlags()); 3245 switch (Flags) { 3246 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3247 if (CGM.getLangOpts().OpenMPIsDevice && 3248 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3249 continue; 3250 if (!CE->getAddress()) { 3251 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3252 DiagnosticsEngine::Error, "Offloading entry for declare target " 3253 "variable %0 is incorrect: the " 3254 "address is invalid."); 3255 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3256 continue; 3257 } 3258 // The vaiable has no definition - no need to add the entry. 3259 if (CE->getVarSize().isZero()) 3260 continue; 3261 break; 3262 } 3263 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3264 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3265 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3266 "Declaret target link address is set."); 3267 if (CGM.getLangOpts().OpenMPIsDevice) 3268 continue; 3269 if (!CE->getAddress()) { 3270 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3271 DiagnosticsEngine::Error, 3272 "Offloading entry for declare target variable is incorrect: the " 3273 "address is invalid."); 3274 CGM.getDiags().Report(DiagID); 3275 continue; 3276 } 3277 break; 3278 } 3279 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3280 CE->getVarSize().getQuantity(), Flags, 3281 CE->getLinkage()); 3282 } else { 3283 llvm_unreachable("Unsupported entry kind."); 3284 } 3285 } 3286 } 3287 3288 /// Loads all the offload entries information from the host IR 3289 /// metadata. 3290 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3291 // If we are in target mode, load the metadata from the host IR. This code has 3292 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3293 3294 if (!CGM.getLangOpts().OpenMPIsDevice) 3295 return; 3296 3297 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3298 return; 3299 3300 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3301 if (auto EC = Buf.getError()) { 3302 CGM.getDiags().Report(diag::err_cannot_open_file) 3303 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3304 return; 3305 } 3306 3307 llvm::LLVMContext C; 3308 auto ME = expectedToErrorOrAndEmitErrors( 3309 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3310 3311 if (auto EC = ME.getError()) { 3312 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3313 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3314 CGM.getDiags().Report(DiagID) 3315 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3316 return; 3317 } 3318 3319 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3320 if (!MD) 3321 return; 3322 3323 for (llvm::MDNode *MN : MD->operands()) { 3324 auto &&GetMDInt = [MN](unsigned Idx) { 3325 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3326 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3327 }; 3328 3329 auto &&GetMDString = [MN](unsigned Idx) { 3330 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3331 return V->getString(); 3332 }; 3333 3334 switch (GetMDInt(0)) { 3335 default: 3336 llvm_unreachable("Unexpected metadata!"); 3337 break; 3338 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3339 OffloadingEntryInfoTargetRegion: 3340 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3341 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3342 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3343 /*Order=*/GetMDInt(5)); 3344 break; 3345 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3346 OffloadingEntryInfoDeviceGlobalVar: 3347 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3348 /*MangledName=*/GetMDString(1), 3349 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3350 /*Flags=*/GetMDInt(2)), 3351 /*Order=*/GetMDInt(3)); 3352 break; 3353 } 3354 } 3355 } 3356 3357 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3358 if (!KmpRoutineEntryPtrTy) { 3359 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3360 ASTContext &C = CGM.getContext(); 3361 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3362 FunctionProtoType::ExtProtoInfo EPI; 3363 KmpRoutineEntryPtrQTy = C.getPointerType( 3364 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3365 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3366 } 3367 } 3368 3369 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3370 // Make sure the type of the entry is already created. This is the type we 3371 // have to create: 3372 // struct __tgt_offload_entry{ 3373 // void *addr; // Pointer to the offload entry info. 3374 // // (function or global) 3375 // char *name; // Name of the function or global. 3376 // size_t size; // Size of the entry info (0 if it a function). 3377 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3378 // int32_t reserved; // Reserved, to use by the runtime library. 3379 // }; 3380 if (TgtOffloadEntryQTy.isNull()) { 3381 ASTContext &C = CGM.getContext(); 3382 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3383 RD->startDefinition(); 3384 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3385 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3386 addFieldToRecordDecl(C, RD, C.getSizeType()); 3387 addFieldToRecordDecl( 3388 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3389 addFieldToRecordDecl( 3390 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3391 RD->completeDefinition(); 3392 RD->addAttr(PackedAttr::CreateImplicit(C)); 3393 TgtOffloadEntryQTy = C.getRecordType(RD); 3394 } 3395 return TgtOffloadEntryQTy; 3396 } 3397 3398 namespace { 3399 struct PrivateHelpersTy { 3400 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3401 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3402 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3403 PrivateElemInit(PrivateElemInit) {} 3404 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3405 const Expr *OriginalRef = nullptr; 3406 const VarDecl *Original = nullptr; 3407 const VarDecl *PrivateCopy = nullptr; 3408 const VarDecl *PrivateElemInit = nullptr; 3409 bool isLocalPrivate() const { 3410 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3411 } 3412 }; 3413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3414 } // anonymous namespace 3415 3416 static bool isAllocatableDecl(const VarDecl *VD) { 3417 const VarDecl *CVD = VD->getCanonicalDecl(); 3418 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3419 return false; 3420 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3421 // Use the default allocation. 3422 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3423 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3424 !AA->getAllocator()); 3425 } 3426 3427 static RecordDecl * 3428 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3429 if (!Privates.empty()) { 3430 ASTContext &C = CGM.getContext(); 3431 // Build struct .kmp_privates_t. { 3432 // /* private vars */ 3433 // }; 3434 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3435 RD->startDefinition(); 3436 for (const auto &Pair : Privates) { 3437 const VarDecl *VD = Pair.second.Original; 3438 QualType Type = VD->getType().getNonReferenceType(); 3439 // If the private variable is a local variable with lvalue ref type, 3440 // allocate the pointer instead of the pointee type. 3441 if (Pair.second.isLocalPrivate()) { 3442 if (VD->getType()->isLValueReferenceType()) 3443 Type = C.getPointerType(Type); 3444 if (isAllocatableDecl(VD)) 3445 Type = C.getPointerType(Type); 3446 } 3447 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3448 if (VD->hasAttrs()) { 3449 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3450 E(VD->getAttrs().end()); 3451 I != E; ++I) 3452 FD->addAttr(*I); 3453 } 3454 } 3455 RD->completeDefinition(); 3456 return RD; 3457 } 3458 return nullptr; 3459 } 3460 3461 static RecordDecl * 3462 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3463 QualType KmpInt32Ty, 3464 QualType KmpRoutineEntryPointerQTy) { 3465 ASTContext &C = CGM.getContext(); 3466 // Build struct kmp_task_t { 3467 // void * shareds; 3468 // kmp_routine_entry_t routine; 3469 // kmp_int32 part_id; 3470 // kmp_cmplrdata_t data1; 3471 // kmp_cmplrdata_t data2; 3472 // For taskloops additional fields: 3473 // kmp_uint64 lb; 3474 // kmp_uint64 ub; 3475 // kmp_int64 st; 3476 // kmp_int32 liter; 3477 // void * reductions; 3478 // }; 3479 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3480 UD->startDefinition(); 3481 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3482 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3483 UD->completeDefinition(); 3484 QualType KmpCmplrdataTy = C.getRecordType(UD); 3485 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3486 RD->startDefinition(); 3487 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3488 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3489 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3490 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3491 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3492 if (isOpenMPTaskLoopDirective(Kind)) { 3493 QualType KmpUInt64Ty = 3494 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3495 QualType KmpInt64Ty = 3496 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3497 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3498 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3499 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3500 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3501 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3502 } 3503 RD->completeDefinition(); 3504 return RD; 3505 } 3506 3507 static RecordDecl * 3508 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3509 ArrayRef<PrivateDataTy> Privates) { 3510 ASTContext &C = CGM.getContext(); 3511 // Build struct kmp_task_t_with_privates { 3512 // kmp_task_t task_data; 3513 // .kmp_privates_t. privates; 3514 // }; 3515 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3516 RD->startDefinition(); 3517 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3518 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3519 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3520 RD->completeDefinition(); 3521 return RD; 3522 } 3523 3524 /// Emit a proxy function which accepts kmp_task_t as the second 3525 /// argument. 3526 /// \code 3527 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3528 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3529 /// For taskloops: 3530 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3531 /// tt->reductions, tt->shareds); 3532 /// return 0; 3533 /// } 3534 /// \endcode 3535 static llvm::Function * 3536 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3537 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3538 QualType KmpTaskTWithPrivatesPtrQTy, 3539 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3540 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3541 llvm::Value *TaskPrivatesMap) { 3542 ASTContext &C = CGM.getContext(); 3543 FunctionArgList Args; 3544 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3545 ImplicitParamDecl::Other); 3546 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3547 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3548 ImplicitParamDecl::Other); 3549 Args.push_back(&GtidArg); 3550 Args.push_back(&TaskTypeArg); 3551 const auto &TaskEntryFnInfo = 3552 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3553 llvm::FunctionType *TaskEntryTy = 3554 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3555 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3556 auto *TaskEntry = llvm::Function::Create( 3557 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3558 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3559 TaskEntry->setDoesNotRecurse(); 3560 CodeGenFunction CGF(CGM); 3561 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3562 Loc, Loc); 3563 3564 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3565 // tt, 3566 // For taskloops: 3567 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3568 // tt->task_data.shareds); 3569 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3570 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3571 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3572 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3573 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3574 const auto *KmpTaskTWithPrivatesQTyRD = 3575 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3576 LValue Base = 3577 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3578 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3579 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3580 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3581 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3582 3583 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3584 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3585 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3586 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3587 CGF.ConvertTypeForMem(SharedsPtrTy)); 3588 3589 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3590 llvm::Value *PrivatesParam; 3591 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3592 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3593 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3594 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3595 } else { 3596 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3597 } 3598 3599 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3600 TaskPrivatesMap, 3601 CGF.Builder 3602 .CreatePointerBitCastOrAddrSpaceCast( 3603 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3604 .getPointer()}; 3605 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3606 std::end(CommonArgs)); 3607 if (isOpenMPTaskLoopDirective(Kind)) { 3608 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3609 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3610 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3611 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3612 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3613 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3614 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3615 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3616 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3617 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3618 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3619 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3620 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3621 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3622 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3623 CallArgs.push_back(LBParam); 3624 CallArgs.push_back(UBParam); 3625 CallArgs.push_back(StParam); 3626 CallArgs.push_back(LIParam); 3627 CallArgs.push_back(RParam); 3628 } 3629 CallArgs.push_back(SharedsParam); 3630 3631 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3632 CallArgs); 3633 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3634 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3635 CGF.FinishFunction(); 3636 return TaskEntry; 3637 } 3638 3639 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3640 SourceLocation Loc, 3641 QualType KmpInt32Ty, 3642 QualType KmpTaskTWithPrivatesPtrQTy, 3643 QualType KmpTaskTWithPrivatesQTy) { 3644 ASTContext &C = CGM.getContext(); 3645 FunctionArgList Args; 3646 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3647 ImplicitParamDecl::Other); 3648 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3649 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3650 ImplicitParamDecl::Other); 3651 Args.push_back(&GtidArg); 3652 Args.push_back(&TaskTypeArg); 3653 const auto &DestructorFnInfo = 3654 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3655 llvm::FunctionType *DestructorFnTy = 3656 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3657 std::string Name = 3658 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3659 auto *DestructorFn = 3660 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3661 Name, &CGM.getModule()); 3662 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3663 DestructorFnInfo); 3664 DestructorFn->setDoesNotRecurse(); 3665 CodeGenFunction CGF(CGM); 3666 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3667 Args, Loc, Loc); 3668 3669 LValue Base = CGF.EmitLoadOfPointerLValue( 3670 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3671 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3672 const auto *KmpTaskTWithPrivatesQTyRD = 3673 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3674 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3675 Base = CGF.EmitLValueForField(Base, *FI); 3676 for (const auto *Field : 3677 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3678 if (QualType::DestructionKind DtorKind = 3679 Field->getType().isDestructedType()) { 3680 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3681 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3682 } 3683 } 3684 CGF.FinishFunction(); 3685 return DestructorFn; 3686 } 3687 3688 /// Emit a privates mapping function for correct handling of private and 3689 /// firstprivate variables. 3690 /// \code 3691 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3692 /// **noalias priv1,..., <tyn> **noalias privn) { 3693 /// *priv1 = &.privates.priv1; 3694 /// ...; 3695 /// *privn = &.privates.privn; 3696 /// } 3697 /// \endcode 3698 static llvm::Value * 3699 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3700 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3701 ArrayRef<PrivateDataTy> Privates) { 3702 ASTContext &C = CGM.getContext(); 3703 FunctionArgList Args; 3704 ImplicitParamDecl TaskPrivatesArg( 3705 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3706 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3707 ImplicitParamDecl::Other); 3708 Args.push_back(&TaskPrivatesArg); 3709 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3710 unsigned Counter = 1; 3711 for (const Expr *E : Data.PrivateVars) { 3712 Args.push_back(ImplicitParamDecl::Create( 3713 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3714 C.getPointerType(C.getPointerType(E->getType())) 3715 .withConst() 3716 .withRestrict(), 3717 ImplicitParamDecl::Other)); 3718 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3719 PrivateVarsPos[VD] = Counter; 3720 ++Counter; 3721 } 3722 for (const Expr *E : Data.FirstprivateVars) { 3723 Args.push_back(ImplicitParamDecl::Create( 3724 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3725 C.getPointerType(C.getPointerType(E->getType())) 3726 .withConst() 3727 .withRestrict(), 3728 ImplicitParamDecl::Other)); 3729 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3730 PrivateVarsPos[VD] = Counter; 3731 ++Counter; 3732 } 3733 for (const Expr *E : Data.LastprivateVars) { 3734 Args.push_back(ImplicitParamDecl::Create( 3735 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3736 C.getPointerType(C.getPointerType(E->getType())) 3737 .withConst() 3738 .withRestrict(), 3739 ImplicitParamDecl::Other)); 3740 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3741 PrivateVarsPos[VD] = Counter; 3742 ++Counter; 3743 } 3744 for (const VarDecl *VD : Data.PrivateLocals) { 3745 QualType Ty = VD->getType().getNonReferenceType(); 3746 if (VD->getType()->isLValueReferenceType()) 3747 Ty = C.getPointerType(Ty); 3748 if (isAllocatableDecl(VD)) 3749 Ty = C.getPointerType(Ty); 3750 Args.push_back(ImplicitParamDecl::Create( 3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3752 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3753 ImplicitParamDecl::Other)); 3754 PrivateVarsPos[VD] = Counter; 3755 ++Counter; 3756 } 3757 const auto &TaskPrivatesMapFnInfo = 3758 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3759 llvm::FunctionType *TaskPrivatesMapTy = 3760 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3761 std::string Name = 3762 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3763 auto *TaskPrivatesMap = llvm::Function::Create( 3764 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3765 &CGM.getModule()); 3766 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3767 TaskPrivatesMapFnInfo); 3768 if (CGM.getLangOpts().Optimize) { 3769 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3770 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3771 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3772 } 3773 CodeGenFunction CGF(CGM); 3774 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3775 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3776 3777 // *privi = &.privates.privi; 3778 LValue Base = CGF.EmitLoadOfPointerLValue( 3779 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3780 TaskPrivatesArg.getType()->castAs<PointerType>()); 3781 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3782 Counter = 0; 3783 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3784 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3785 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3786 LValue RefLVal = 3787 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3788 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3789 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3790 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3791 ++Counter; 3792 } 3793 CGF.FinishFunction(); 3794 return TaskPrivatesMap; 3795 } 3796 3797 /// Emit initialization for private variables in task-based directives. 3798 static void emitPrivatesInit(CodeGenFunction &CGF, 3799 const OMPExecutableDirective &D, 3800 Address KmpTaskSharedsPtr, LValue TDBase, 3801 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3802 QualType SharedsTy, QualType SharedsPtrTy, 3803 const OMPTaskDataTy &Data, 3804 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3805 ASTContext &C = CGF.getContext(); 3806 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3807 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3808 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3809 ? OMPD_taskloop 3810 : OMPD_task; 3811 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3812 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3813 LValue SrcBase; 3814 bool IsTargetTask = 3815 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3816 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3817 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3818 // PointersArray, SizesArray, and MappersArray. The original variables for 3819 // these arrays are not captured and we get their addresses explicitly. 3820 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3821 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3822 SrcBase = CGF.MakeAddrLValue( 3823 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3824 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3825 SharedsTy); 3826 } 3827 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3828 for (const PrivateDataTy &Pair : Privates) { 3829 // Do not initialize private locals. 3830 if (Pair.second.isLocalPrivate()) { 3831 ++FI; 3832 continue; 3833 } 3834 const VarDecl *VD = Pair.second.PrivateCopy; 3835 const Expr *Init = VD->getAnyInitializer(); 3836 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3837 !CGF.isTrivialInitializer(Init)))) { 3838 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3839 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3840 const VarDecl *OriginalVD = Pair.second.Original; 3841 // Check if the variable is the target-based BasePointersArray, 3842 // PointersArray, SizesArray, or MappersArray. 3843 LValue SharedRefLValue; 3844 QualType Type = PrivateLValue.getType(); 3845 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3846 if (IsTargetTask && !SharedField) { 3847 assert(isa<ImplicitParamDecl>(OriginalVD) && 3848 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3849 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3850 ->getNumParams() == 0 && 3851 isa<TranslationUnitDecl>( 3852 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3853 ->getDeclContext()) && 3854 "Expected artificial target data variable."); 3855 SharedRefLValue = 3856 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3857 } else if (ForDup) { 3858 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3859 SharedRefLValue = CGF.MakeAddrLValue( 3860 Address(SharedRefLValue.getPointer(CGF), 3861 C.getDeclAlign(OriginalVD)), 3862 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3863 SharedRefLValue.getTBAAInfo()); 3864 } else if (CGF.LambdaCaptureFields.count( 3865 Pair.second.Original->getCanonicalDecl()) > 0 || 3866 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3867 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3868 } else { 3869 // Processing for implicitly captured variables. 3870 InlinedOpenMPRegionRAII Region( 3871 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3872 /*HasCancel=*/false, /*NoInheritance=*/true); 3873 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3874 } 3875 if (Type->isArrayType()) { 3876 // Initialize firstprivate array. 3877 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3878 // Perform simple memcpy. 3879 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3880 } else { 3881 // Initialize firstprivate array using element-by-element 3882 // initialization. 3883 CGF.EmitOMPAggregateAssign( 3884 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3885 Type, 3886 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3887 Address SrcElement) { 3888 // Clean up any temporaries needed by the initialization. 3889 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3890 InitScope.addPrivate( 3891 Elem, [SrcElement]() -> Address { return SrcElement; }); 3892 (void)InitScope.Privatize(); 3893 // Emit initialization for single element. 3894 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3895 CGF, &CapturesInfo); 3896 CGF.EmitAnyExprToMem(Init, DestElement, 3897 Init->getType().getQualifiers(), 3898 /*IsInitializer=*/false); 3899 }); 3900 } 3901 } else { 3902 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3903 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3904 return SharedRefLValue.getAddress(CGF); 3905 }); 3906 (void)InitScope.Privatize(); 3907 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3908 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3909 /*capturedByInit=*/false); 3910 } 3911 } else { 3912 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3913 } 3914 } 3915 ++FI; 3916 } 3917 } 3918 3919 /// Check if duplication function is required for taskloops. 3920 static bool checkInitIsRequired(CodeGenFunction &CGF, 3921 ArrayRef<PrivateDataTy> Privates) { 3922 bool InitRequired = false; 3923 for (const PrivateDataTy &Pair : Privates) { 3924 if (Pair.second.isLocalPrivate()) 3925 continue; 3926 const VarDecl *VD = Pair.second.PrivateCopy; 3927 const Expr *Init = VD->getAnyInitializer(); 3928 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3929 !CGF.isTrivialInitializer(Init)); 3930 if (InitRequired) 3931 break; 3932 } 3933 return InitRequired; 3934 } 3935 3936 3937 /// Emit task_dup function (for initialization of 3938 /// private/firstprivate/lastprivate vars and last_iter flag) 3939 /// \code 3940 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3941 /// lastpriv) { 3942 /// // setup lastprivate flag 3943 /// task_dst->last = lastpriv; 3944 /// // could be constructor calls here... 3945 /// } 3946 /// \endcode 3947 static llvm::Value * 3948 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3949 const OMPExecutableDirective &D, 3950 QualType KmpTaskTWithPrivatesPtrQTy, 3951 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3952 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3953 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3954 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3955 ASTContext &C = CGM.getContext(); 3956 FunctionArgList Args; 3957 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3958 KmpTaskTWithPrivatesPtrQTy, 3959 ImplicitParamDecl::Other); 3960 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3961 KmpTaskTWithPrivatesPtrQTy, 3962 ImplicitParamDecl::Other); 3963 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3964 ImplicitParamDecl::Other); 3965 Args.push_back(&DstArg); 3966 Args.push_back(&SrcArg); 3967 Args.push_back(&LastprivArg); 3968 const auto &TaskDupFnInfo = 3969 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3970 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3971 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3972 auto *TaskDup = llvm::Function::Create( 3973 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3974 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3975 TaskDup->setDoesNotRecurse(); 3976 CodeGenFunction CGF(CGM); 3977 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3978 Loc); 3979 3980 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3981 CGF.GetAddrOfLocalVar(&DstArg), 3982 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3983 // task_dst->liter = lastpriv; 3984 if (WithLastIter) { 3985 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3986 LValue Base = CGF.EmitLValueForField( 3987 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3988 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3989 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3990 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3991 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3992 } 3993 3994 // Emit initial values for private copies (if any). 3995 assert(!Privates.empty()); 3996 Address KmpTaskSharedsPtr = Address::invalid(); 3997 if (!Data.FirstprivateVars.empty()) { 3998 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3999 CGF.GetAddrOfLocalVar(&SrcArg), 4000 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4001 LValue Base = CGF.EmitLValueForField( 4002 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4003 KmpTaskSharedsPtr = Address( 4004 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4005 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4006 KmpTaskTShareds)), 4007 Loc), 4008 CGM.getNaturalTypeAlignment(SharedsTy)); 4009 } 4010 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4011 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4012 CGF.FinishFunction(); 4013 return TaskDup; 4014 } 4015 4016 /// Checks if destructor function is required to be generated. 4017 /// \return true if cleanups are required, false otherwise. 4018 static bool 4019 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4020 ArrayRef<PrivateDataTy> Privates) { 4021 for (const PrivateDataTy &P : Privates) { 4022 if (P.second.isLocalPrivate()) 4023 continue; 4024 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4025 if (Ty.isDestructedType()) 4026 return true; 4027 } 4028 return false; 4029 } 4030 4031 namespace { 4032 /// Loop generator for OpenMP iterator expression. 4033 class OMPIteratorGeneratorScope final 4034 : public CodeGenFunction::OMPPrivateScope { 4035 CodeGenFunction &CGF; 4036 const OMPIteratorExpr *E = nullptr; 4037 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4038 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4039 OMPIteratorGeneratorScope() = delete; 4040 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4041 4042 public: 4043 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4044 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4045 if (!E) 4046 return; 4047 SmallVector<llvm::Value *, 4> Uppers; 4048 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4049 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4050 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4051 addPrivate(VD, [&CGF, VD]() { 4052 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4053 }); 4054 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4055 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4056 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4057 "counter.addr"); 4058 }); 4059 } 4060 Privatize(); 4061 4062 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4063 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4064 LValue CLVal = 4065 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4066 HelperData.CounterVD->getType()); 4067 // Counter = 0; 4068 CGF.EmitStoreOfScalar( 4069 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4070 CLVal); 4071 CodeGenFunction::JumpDest &ContDest = 4072 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4073 CodeGenFunction::JumpDest &ExitDest = 4074 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4075 // N = <number-of_iterations>; 4076 llvm::Value *N = Uppers[I]; 4077 // cont: 4078 // if (Counter < N) goto body; else goto exit; 4079 CGF.EmitBlock(ContDest.getBlock()); 4080 auto *CVal = 4081 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4082 llvm::Value *Cmp = 4083 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4084 ? CGF.Builder.CreateICmpSLT(CVal, N) 4085 : CGF.Builder.CreateICmpULT(CVal, N); 4086 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4087 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4088 // body: 4089 CGF.EmitBlock(BodyBB); 4090 // Iteri = Begini + Counter * Stepi; 4091 CGF.EmitIgnoredExpr(HelperData.Update); 4092 } 4093 } 4094 ~OMPIteratorGeneratorScope() { 4095 if (!E) 4096 return; 4097 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4098 // Counter = Counter + 1; 4099 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4100 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4101 // goto cont; 4102 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4103 // exit: 4104 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4105 } 4106 } 4107 }; 4108 } // namespace 4109 4110 static std::pair<llvm::Value *, llvm::Value *> 4111 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4112 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4113 llvm::Value *Addr; 4114 if (OASE) { 4115 const Expr *Base = OASE->getBase(); 4116 Addr = CGF.EmitScalarExpr(Base); 4117 } else { 4118 Addr = CGF.EmitLValue(E).getPointer(CGF); 4119 } 4120 llvm::Value *SizeVal; 4121 QualType Ty = E->getType(); 4122 if (OASE) { 4123 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4124 for (const Expr *SE : OASE->getDimensions()) { 4125 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4126 Sz = CGF.EmitScalarConversion( 4127 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4128 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4129 } 4130 } else if (const auto *ASE = 4131 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4132 LValue UpAddrLVal = 4133 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4134 llvm::Value *UpAddr = 4135 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4136 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4137 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4138 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4139 } else { 4140 SizeVal = CGF.getTypeSize(Ty); 4141 } 4142 return std::make_pair(Addr, SizeVal); 4143 } 4144 4145 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4146 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4147 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4148 if (KmpTaskAffinityInfoTy.isNull()) { 4149 RecordDecl *KmpAffinityInfoRD = 4150 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4151 KmpAffinityInfoRD->startDefinition(); 4152 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4153 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4154 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4155 KmpAffinityInfoRD->completeDefinition(); 4156 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4157 } 4158 } 4159 4160 CGOpenMPRuntime::TaskResultTy 4161 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4162 const OMPExecutableDirective &D, 4163 llvm::Function *TaskFunction, QualType SharedsTy, 4164 Address Shareds, const OMPTaskDataTy &Data) { 4165 ASTContext &C = CGM.getContext(); 4166 llvm::SmallVector<PrivateDataTy, 4> Privates; 4167 // Aggregate privates and sort them by the alignment. 4168 const auto *I = Data.PrivateCopies.begin(); 4169 for (const Expr *E : Data.PrivateVars) { 4170 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4171 Privates.emplace_back( 4172 C.getDeclAlign(VD), 4173 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4174 /*PrivateElemInit=*/nullptr)); 4175 ++I; 4176 } 4177 I = Data.FirstprivateCopies.begin(); 4178 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4179 for (const Expr *E : Data.FirstprivateVars) { 4180 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4181 Privates.emplace_back( 4182 C.getDeclAlign(VD), 4183 PrivateHelpersTy( 4184 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4185 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4186 ++I; 4187 ++IElemInitRef; 4188 } 4189 I = Data.LastprivateCopies.begin(); 4190 for (const Expr *E : Data.LastprivateVars) { 4191 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4192 Privates.emplace_back( 4193 C.getDeclAlign(VD), 4194 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4195 /*PrivateElemInit=*/nullptr)); 4196 ++I; 4197 } 4198 for (const VarDecl *VD : Data.PrivateLocals) { 4199 if (isAllocatableDecl(VD)) 4200 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4201 else 4202 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4203 } 4204 llvm::stable_sort(Privates, 4205 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4206 return L.first > R.first; 4207 }); 4208 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4209 // Build type kmp_routine_entry_t (if not built yet). 4210 emitKmpRoutineEntryT(KmpInt32Ty); 4211 // Build type kmp_task_t (if not built yet). 4212 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4213 if (SavedKmpTaskloopTQTy.isNull()) { 4214 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4215 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4216 } 4217 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4218 } else { 4219 assert((D.getDirectiveKind() == OMPD_task || 4220 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4221 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4222 "Expected taskloop, task or target directive"); 4223 if (SavedKmpTaskTQTy.isNull()) { 4224 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4225 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4226 } 4227 KmpTaskTQTy = SavedKmpTaskTQTy; 4228 } 4229 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4230 // Build particular struct kmp_task_t for the given task. 4231 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4232 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4233 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4234 QualType KmpTaskTWithPrivatesPtrQTy = 4235 C.getPointerType(KmpTaskTWithPrivatesQTy); 4236 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4237 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4238 KmpTaskTWithPrivatesTy->getPointerTo(); 4239 llvm::Value *KmpTaskTWithPrivatesTySize = 4240 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4241 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4242 4243 // Emit initial values for private copies (if any). 4244 llvm::Value *TaskPrivatesMap = nullptr; 4245 llvm::Type *TaskPrivatesMapTy = 4246 std::next(TaskFunction->arg_begin(), 3)->getType(); 4247 if (!Privates.empty()) { 4248 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4249 TaskPrivatesMap = 4250 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4251 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4252 TaskPrivatesMap, TaskPrivatesMapTy); 4253 } else { 4254 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4255 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4256 } 4257 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4258 // kmp_task_t *tt); 4259 llvm::Function *TaskEntry = emitProxyTaskFunction( 4260 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4261 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4262 TaskPrivatesMap); 4263 4264 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4265 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4266 // kmp_routine_entry_t *task_entry); 4267 // Task flags. Format is taken from 4268 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4269 // description of kmp_tasking_flags struct. 4270 enum { 4271 TiedFlag = 0x1, 4272 FinalFlag = 0x2, 4273 DestructorsFlag = 0x8, 4274 PriorityFlag = 0x20, 4275 DetachableFlag = 0x40, 4276 }; 4277 unsigned Flags = Data.Tied ? TiedFlag : 0; 4278 bool NeedsCleanup = false; 4279 if (!Privates.empty()) { 4280 NeedsCleanup = 4281 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4282 if (NeedsCleanup) 4283 Flags = Flags | DestructorsFlag; 4284 } 4285 if (Data.Priority.getInt()) 4286 Flags = Flags | PriorityFlag; 4287 if (D.hasClausesOfKind<OMPDetachClause>()) 4288 Flags = Flags | DetachableFlag; 4289 llvm::Value *TaskFlags = 4290 Data.Final.getPointer() 4291 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4292 CGF.Builder.getInt32(FinalFlag), 4293 CGF.Builder.getInt32(/*C=*/0)) 4294 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4295 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4296 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4297 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4298 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4299 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4300 TaskEntry, KmpRoutineEntryPtrTy)}; 4301 llvm::Value *NewTask; 4302 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4303 // Check if we have any device clause associated with the directive. 4304 const Expr *Device = nullptr; 4305 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4306 Device = C->getDevice(); 4307 // Emit device ID if any otherwise use default value. 4308 llvm::Value *DeviceID; 4309 if (Device) 4310 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4311 CGF.Int64Ty, /*isSigned=*/true); 4312 else 4313 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4314 AllocArgs.push_back(DeviceID); 4315 NewTask = CGF.EmitRuntimeCall( 4316 OMPBuilder.getOrCreateRuntimeFunction( 4317 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4318 AllocArgs); 4319 } else { 4320 NewTask = 4321 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4322 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4323 AllocArgs); 4324 } 4325 // Emit detach clause initialization. 4326 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4327 // task_descriptor); 4328 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4329 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4330 LValue EvtLVal = CGF.EmitLValue(Evt); 4331 4332 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4333 // int gtid, kmp_task_t *task); 4334 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4335 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4336 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4337 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4338 OMPBuilder.getOrCreateRuntimeFunction( 4339 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4340 {Loc, Tid, NewTask}); 4341 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4342 Evt->getExprLoc()); 4343 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4344 } 4345 // Process affinity clauses. 4346 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4347 // Process list of affinity data. 4348 ASTContext &C = CGM.getContext(); 4349 Address AffinitiesArray = Address::invalid(); 4350 // Calculate number of elements to form the array of affinity data. 4351 llvm::Value *NumOfElements = nullptr; 4352 unsigned NumAffinities = 0; 4353 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4354 if (const Expr *Modifier = C->getModifier()) { 4355 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4356 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4357 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4358 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4359 NumOfElements = 4360 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4361 } 4362 } else { 4363 NumAffinities += C->varlist_size(); 4364 } 4365 } 4366 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4367 // Fields ids in kmp_task_affinity_info record. 4368 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4369 4370 QualType KmpTaskAffinityInfoArrayTy; 4371 if (NumOfElements) { 4372 NumOfElements = CGF.Builder.CreateNUWAdd( 4373 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4374 OpaqueValueExpr OVE( 4375 Loc, 4376 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4377 VK_RValue); 4378 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4379 RValue::get(NumOfElements)); 4380 KmpTaskAffinityInfoArrayTy = 4381 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4382 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4383 // Properly emit variable-sized array. 4384 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4385 ImplicitParamDecl::Other); 4386 CGF.EmitVarDecl(*PD); 4387 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4388 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4389 /*isSigned=*/false); 4390 } else { 4391 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4392 KmpTaskAffinityInfoTy, 4393 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4394 ArrayType::Normal, /*IndexTypeQuals=*/0); 4395 AffinitiesArray = 4396 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4397 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4398 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4399 /*isSigned=*/false); 4400 } 4401 4402 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4403 // Fill array by elements without iterators. 4404 unsigned Pos = 0; 4405 bool HasIterator = false; 4406 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4407 if (C->getModifier()) { 4408 HasIterator = true; 4409 continue; 4410 } 4411 for (const Expr *E : C->varlists()) { 4412 llvm::Value *Addr; 4413 llvm::Value *Size; 4414 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4415 LValue Base = 4416 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4417 KmpTaskAffinityInfoTy); 4418 // affs[i].base_addr = &<Affinities[i].second>; 4419 LValue BaseAddrLVal = CGF.EmitLValueForField( 4420 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4421 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4422 BaseAddrLVal); 4423 // affs[i].len = sizeof(<Affinities[i].second>); 4424 LValue LenLVal = CGF.EmitLValueForField( 4425 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4426 CGF.EmitStoreOfScalar(Size, LenLVal); 4427 ++Pos; 4428 } 4429 } 4430 LValue PosLVal; 4431 if (HasIterator) { 4432 PosLVal = CGF.MakeAddrLValue( 4433 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4434 C.getSizeType()); 4435 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4436 } 4437 // Process elements with iterators. 4438 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4439 const Expr *Modifier = C->getModifier(); 4440 if (!Modifier) 4441 continue; 4442 OMPIteratorGeneratorScope IteratorScope( 4443 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4444 for (const Expr *E : C->varlists()) { 4445 llvm::Value *Addr; 4446 llvm::Value *Size; 4447 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4448 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4449 LValue Base = CGF.MakeAddrLValue( 4450 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4451 AffinitiesArray.getAlignment()), 4452 KmpTaskAffinityInfoTy); 4453 // affs[i].base_addr = &<Affinities[i].second>; 4454 LValue BaseAddrLVal = CGF.EmitLValueForField( 4455 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4456 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4457 BaseAddrLVal); 4458 // affs[i].len = sizeof(<Affinities[i].second>); 4459 LValue LenLVal = CGF.EmitLValueForField( 4460 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4461 CGF.EmitStoreOfScalar(Size, LenLVal); 4462 Idx = CGF.Builder.CreateNUWAdd( 4463 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4464 CGF.EmitStoreOfScalar(Idx, PosLVal); 4465 } 4466 } 4467 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4468 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4469 // naffins, kmp_task_affinity_info_t *affin_list); 4470 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4471 llvm::Value *GTid = getThreadID(CGF, Loc); 4472 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4473 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4474 // FIXME: Emit the function and ignore its result for now unless the 4475 // runtime function is properly implemented. 4476 (void)CGF.EmitRuntimeCall( 4477 OMPBuilder.getOrCreateRuntimeFunction( 4478 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4479 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4480 } 4481 llvm::Value *NewTaskNewTaskTTy = 4482 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4483 NewTask, KmpTaskTWithPrivatesPtrTy); 4484 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4485 KmpTaskTWithPrivatesQTy); 4486 LValue TDBase = 4487 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4488 // Fill the data in the resulting kmp_task_t record. 4489 // Copy shareds if there are any. 4490 Address KmpTaskSharedsPtr = Address::invalid(); 4491 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4492 KmpTaskSharedsPtr = 4493 Address(CGF.EmitLoadOfScalar( 4494 CGF.EmitLValueForField( 4495 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4496 KmpTaskTShareds)), 4497 Loc), 4498 CGM.getNaturalTypeAlignment(SharedsTy)); 4499 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4500 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4501 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4502 } 4503 // Emit initial values for private copies (if any). 4504 TaskResultTy Result; 4505 if (!Privates.empty()) { 4506 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4507 SharedsTy, SharedsPtrTy, Data, Privates, 4508 /*ForDup=*/false); 4509 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4510 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4511 Result.TaskDupFn = emitTaskDupFunction( 4512 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4513 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4514 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4515 } 4516 } 4517 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4518 enum { Priority = 0, Destructors = 1 }; 4519 // Provide pointer to function with destructors for privates. 4520 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4521 const RecordDecl *KmpCmplrdataUD = 4522 (*FI)->getType()->getAsUnionType()->getDecl(); 4523 if (NeedsCleanup) { 4524 llvm::Value *DestructorFn = emitDestructorsFunction( 4525 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4526 KmpTaskTWithPrivatesQTy); 4527 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4528 LValue DestructorsLV = CGF.EmitLValueForField( 4529 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4530 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4531 DestructorFn, KmpRoutineEntryPtrTy), 4532 DestructorsLV); 4533 } 4534 // Set priority. 4535 if (Data.Priority.getInt()) { 4536 LValue Data2LV = CGF.EmitLValueForField( 4537 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4538 LValue PriorityLV = CGF.EmitLValueForField( 4539 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4540 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4541 } 4542 Result.NewTask = NewTask; 4543 Result.TaskEntry = TaskEntry; 4544 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4545 Result.TDBase = TDBase; 4546 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4547 return Result; 4548 } 4549 4550 namespace { 4551 /// Dependence kind for RTL. 4552 enum RTLDependenceKindTy { 4553 DepIn = 0x01, 4554 DepInOut = 0x3, 4555 DepMutexInOutSet = 0x4 4556 }; 4557 /// Fields ids in kmp_depend_info record. 4558 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4559 } // namespace 4560 4561 /// Translates internal dependency kind into the runtime kind. 4562 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4563 RTLDependenceKindTy DepKind; 4564 switch (K) { 4565 case OMPC_DEPEND_in: 4566 DepKind = DepIn; 4567 break; 4568 // Out and InOut dependencies must use the same code. 4569 case OMPC_DEPEND_out: 4570 case OMPC_DEPEND_inout: 4571 DepKind = DepInOut; 4572 break; 4573 case OMPC_DEPEND_mutexinoutset: 4574 DepKind = DepMutexInOutSet; 4575 break; 4576 case OMPC_DEPEND_source: 4577 case OMPC_DEPEND_sink: 4578 case OMPC_DEPEND_depobj: 4579 case OMPC_DEPEND_unknown: 4580 llvm_unreachable("Unknown task dependence type"); 4581 } 4582 return DepKind; 4583 } 4584 4585 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4586 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4587 QualType &FlagsTy) { 4588 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4589 if (KmpDependInfoTy.isNull()) { 4590 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4591 KmpDependInfoRD->startDefinition(); 4592 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4593 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4594 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4595 KmpDependInfoRD->completeDefinition(); 4596 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4597 } 4598 } 4599 4600 std::pair<llvm::Value *, LValue> 4601 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4602 SourceLocation Loc) { 4603 ASTContext &C = CGM.getContext(); 4604 QualType FlagsTy; 4605 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4606 RecordDecl *KmpDependInfoRD = 4607 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4608 LValue Base = CGF.EmitLoadOfPointerLValue( 4609 DepobjLVal.getAddress(CGF), 4610 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4611 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4612 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4613 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4614 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4615 Base.getTBAAInfo()); 4616 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4617 Addr.getPointer(), 4618 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4619 LValue NumDepsBase = CGF.MakeAddrLValue( 4620 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4621 Base.getBaseInfo(), Base.getTBAAInfo()); 4622 // NumDeps = deps[i].base_addr; 4623 LValue BaseAddrLVal = CGF.EmitLValueForField( 4624 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4625 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4626 return std::make_pair(NumDeps, Base); 4627 } 4628 4629 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4630 llvm::PointerUnion<unsigned *, LValue *> Pos, 4631 const OMPTaskDataTy::DependData &Data, 4632 Address DependenciesArray) { 4633 CodeGenModule &CGM = CGF.CGM; 4634 ASTContext &C = CGM.getContext(); 4635 QualType FlagsTy; 4636 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4637 RecordDecl *KmpDependInfoRD = 4638 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4639 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4640 4641 OMPIteratorGeneratorScope IteratorScope( 4642 CGF, cast_or_null<OMPIteratorExpr>( 4643 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4644 : nullptr)); 4645 for (const Expr *E : Data.DepExprs) { 4646 llvm::Value *Addr; 4647 llvm::Value *Size; 4648 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4649 LValue Base; 4650 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4651 Base = CGF.MakeAddrLValue( 4652 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4653 } else { 4654 LValue &PosLVal = *Pos.get<LValue *>(); 4655 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4656 Base = CGF.MakeAddrLValue( 4657 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4658 DependenciesArray.getAlignment()), 4659 KmpDependInfoTy); 4660 } 4661 // deps[i].base_addr = &<Dependencies[i].second>; 4662 LValue BaseAddrLVal = CGF.EmitLValueForField( 4663 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4664 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4665 BaseAddrLVal); 4666 // deps[i].len = sizeof(<Dependencies[i].second>); 4667 LValue LenLVal = CGF.EmitLValueForField( 4668 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4669 CGF.EmitStoreOfScalar(Size, LenLVal); 4670 // deps[i].flags = <Dependencies[i].first>; 4671 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4672 LValue FlagsLVal = CGF.EmitLValueForField( 4673 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4674 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4675 FlagsLVal); 4676 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4677 ++(*P); 4678 } else { 4679 LValue &PosLVal = *Pos.get<LValue *>(); 4680 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4681 Idx = CGF.Builder.CreateNUWAdd(Idx, 4682 llvm::ConstantInt::get(Idx->getType(), 1)); 4683 CGF.EmitStoreOfScalar(Idx, PosLVal); 4684 } 4685 } 4686 } 4687 4688 static SmallVector<llvm::Value *, 4> 4689 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4690 const OMPTaskDataTy::DependData &Data) { 4691 assert(Data.DepKind == OMPC_DEPEND_depobj && 4692 "Expected depobj dependecy kind."); 4693 SmallVector<llvm::Value *, 4> Sizes; 4694 SmallVector<LValue, 4> SizeLVals; 4695 ASTContext &C = CGF.getContext(); 4696 QualType FlagsTy; 4697 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4698 RecordDecl *KmpDependInfoRD = 4699 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4700 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4701 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4702 { 4703 OMPIteratorGeneratorScope IteratorScope( 4704 CGF, cast_or_null<OMPIteratorExpr>( 4705 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4706 : nullptr)); 4707 for (const Expr *E : Data.DepExprs) { 4708 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4709 LValue Base = CGF.EmitLoadOfPointerLValue( 4710 DepobjLVal.getAddress(CGF), 4711 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4712 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4713 Base.getAddress(CGF), KmpDependInfoPtrT); 4714 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4715 Base.getTBAAInfo()); 4716 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4717 Addr.getPointer(), 4718 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4719 LValue NumDepsBase = CGF.MakeAddrLValue( 4720 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4721 Base.getBaseInfo(), Base.getTBAAInfo()); 4722 // NumDeps = deps[i].base_addr; 4723 LValue BaseAddrLVal = CGF.EmitLValueForField( 4724 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4725 llvm::Value *NumDeps = 4726 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4727 LValue NumLVal = CGF.MakeAddrLValue( 4728 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4729 C.getUIntPtrType()); 4730 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4731 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4732 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4733 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4734 CGF.EmitStoreOfScalar(Add, NumLVal); 4735 SizeLVals.push_back(NumLVal); 4736 } 4737 } 4738 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4739 llvm::Value *Size = 4740 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4741 Sizes.push_back(Size); 4742 } 4743 return Sizes; 4744 } 4745 4746 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4747 LValue PosLVal, 4748 const OMPTaskDataTy::DependData &Data, 4749 Address DependenciesArray) { 4750 assert(Data.DepKind == OMPC_DEPEND_depobj && 4751 "Expected depobj dependecy kind."); 4752 ASTContext &C = CGF.getContext(); 4753 QualType FlagsTy; 4754 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4755 RecordDecl *KmpDependInfoRD = 4756 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4757 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4758 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4759 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4760 { 4761 OMPIteratorGeneratorScope IteratorScope( 4762 CGF, cast_or_null<OMPIteratorExpr>( 4763 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4764 : nullptr)); 4765 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4766 const Expr *E = Data.DepExprs[I]; 4767 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4768 LValue Base = CGF.EmitLoadOfPointerLValue( 4769 DepobjLVal.getAddress(CGF), 4770 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4771 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4772 Base.getAddress(CGF), KmpDependInfoPtrT); 4773 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4774 Base.getTBAAInfo()); 4775 4776 // Get number of elements in a single depobj. 4777 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4778 Addr.getPointer(), 4779 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4780 LValue NumDepsBase = CGF.MakeAddrLValue( 4781 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4782 Base.getBaseInfo(), Base.getTBAAInfo()); 4783 // NumDeps = deps[i].base_addr; 4784 LValue BaseAddrLVal = CGF.EmitLValueForField( 4785 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4786 llvm::Value *NumDeps = 4787 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4788 4789 // memcopy dependency data. 4790 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4791 ElSize, 4792 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4793 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4794 Address DepAddr = 4795 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4796 DependenciesArray.getAlignment()); 4797 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4798 4799 // Increase pos. 4800 // pos += size; 4801 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4802 CGF.EmitStoreOfScalar(Add, PosLVal); 4803 } 4804 } 4805 } 4806 4807 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4808 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4809 SourceLocation Loc) { 4810 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4811 return D.DepExprs.empty(); 4812 })) 4813 return std::make_pair(nullptr, Address::invalid()); 4814 // Process list of dependencies. 4815 ASTContext &C = CGM.getContext(); 4816 Address DependenciesArray = Address::invalid(); 4817 llvm::Value *NumOfElements = nullptr; 4818 unsigned NumDependencies = std::accumulate( 4819 Dependencies.begin(), Dependencies.end(), 0, 4820 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4821 return D.DepKind == OMPC_DEPEND_depobj 4822 ? V 4823 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4824 }); 4825 QualType FlagsTy; 4826 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4827 bool HasDepobjDeps = false; 4828 bool HasRegularWithIterators = false; 4829 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4830 llvm::Value *NumOfRegularWithIterators = 4831 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4832 // Calculate number of depobj dependecies and regular deps with the iterators. 4833 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4834 if (D.DepKind == OMPC_DEPEND_depobj) { 4835 SmallVector<llvm::Value *, 4> Sizes = 4836 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4837 for (llvm::Value *Size : Sizes) { 4838 NumOfDepobjElements = 4839 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4840 } 4841 HasDepobjDeps = true; 4842 continue; 4843 } 4844 // Include number of iterations, if any. 4845 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4846 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4847 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4848 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4849 NumOfRegularWithIterators = 4850 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4851 } 4852 HasRegularWithIterators = true; 4853 continue; 4854 } 4855 } 4856 4857 QualType KmpDependInfoArrayTy; 4858 if (HasDepobjDeps || HasRegularWithIterators) { 4859 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4860 /*isSigned=*/false); 4861 if (HasDepobjDeps) { 4862 NumOfElements = 4863 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4864 } 4865 if (HasRegularWithIterators) { 4866 NumOfElements = 4867 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4868 } 4869 OpaqueValueExpr OVE(Loc, 4870 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4871 VK_RValue); 4872 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4873 RValue::get(NumOfElements)); 4874 KmpDependInfoArrayTy = 4875 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4876 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4877 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4878 // Properly emit variable-sized array. 4879 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4880 ImplicitParamDecl::Other); 4881 CGF.EmitVarDecl(*PD); 4882 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4883 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4884 /*isSigned=*/false); 4885 } else { 4886 KmpDependInfoArrayTy = C.getConstantArrayType( 4887 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4888 ArrayType::Normal, /*IndexTypeQuals=*/0); 4889 DependenciesArray = 4890 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4891 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4892 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4893 /*isSigned=*/false); 4894 } 4895 unsigned Pos = 0; 4896 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4897 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4898 Dependencies[I].IteratorExpr) 4899 continue; 4900 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4901 DependenciesArray); 4902 } 4903 // Copy regular dependecies with iterators. 4904 LValue PosLVal = CGF.MakeAddrLValue( 4905 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4906 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4907 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4908 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4909 !Dependencies[I].IteratorExpr) 4910 continue; 4911 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4912 DependenciesArray); 4913 } 4914 // Copy final depobj arrays without iterators. 4915 if (HasDepobjDeps) { 4916 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4917 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4918 continue; 4919 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4920 DependenciesArray); 4921 } 4922 } 4923 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4924 DependenciesArray, CGF.VoidPtrTy); 4925 return std::make_pair(NumOfElements, DependenciesArray); 4926 } 4927 4928 Address CGOpenMPRuntime::emitDepobjDependClause( 4929 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4930 SourceLocation Loc) { 4931 if (Dependencies.DepExprs.empty()) 4932 return Address::invalid(); 4933 // Process list of dependencies. 4934 ASTContext &C = CGM.getContext(); 4935 Address DependenciesArray = Address::invalid(); 4936 unsigned NumDependencies = Dependencies.DepExprs.size(); 4937 QualType FlagsTy; 4938 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4939 RecordDecl *KmpDependInfoRD = 4940 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4941 4942 llvm::Value *Size; 4943 // Define type kmp_depend_info[<Dependencies.size()>]; 4944 // For depobj reserve one extra element to store the number of elements. 4945 // It is required to handle depobj(x) update(in) construct. 4946 // kmp_depend_info[<Dependencies.size()>] deps; 4947 llvm::Value *NumDepsVal; 4948 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4949 if (const auto *IE = 4950 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4951 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4952 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4953 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4954 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4955 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4956 } 4957 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4958 NumDepsVal); 4959 CharUnits SizeInBytes = 4960 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4961 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4962 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4963 NumDepsVal = 4964 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4965 } else { 4966 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4967 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4968 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4969 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4970 Size = CGM.getSize(Sz.alignTo(Align)); 4971 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4972 } 4973 // Need to allocate on the dynamic memory. 4974 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4975 // Use default allocator. 4976 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4977 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4978 4979 llvm::Value *Addr = 4980 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4981 CGM.getModule(), OMPRTL___kmpc_alloc), 4982 Args, ".dep.arr.addr"); 4983 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4984 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4985 DependenciesArray = Address(Addr, Align); 4986 // Write number of elements in the first element of array for depobj. 4987 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4988 // deps[i].base_addr = NumDependencies; 4989 LValue BaseAddrLVal = CGF.EmitLValueForField( 4990 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4991 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4992 llvm::PointerUnion<unsigned *, LValue *> Pos; 4993 unsigned Idx = 1; 4994 LValue PosLVal; 4995 if (Dependencies.IteratorExpr) { 4996 PosLVal = CGF.MakeAddrLValue( 4997 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4998 C.getSizeType()); 4999 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5000 /*IsInit=*/true); 5001 Pos = &PosLVal; 5002 } else { 5003 Pos = &Idx; 5004 } 5005 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5006 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5007 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5008 return DependenciesArray; 5009 } 5010 5011 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5012 SourceLocation Loc) { 5013 ASTContext &C = CGM.getContext(); 5014 QualType FlagsTy; 5015 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5016 LValue Base = CGF.EmitLoadOfPointerLValue( 5017 DepobjLVal.getAddress(CGF), 5018 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5019 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5020 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5021 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5022 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5023 Addr.getPointer(), 5024 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5025 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5026 CGF.VoidPtrTy); 5027 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5028 // Use default allocator. 5029 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5030 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5031 5032 // _kmpc_free(gtid, addr, nullptr); 5033 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5034 CGM.getModule(), OMPRTL___kmpc_free), 5035 Args); 5036 } 5037 5038 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5039 OpenMPDependClauseKind NewDepKind, 5040 SourceLocation Loc) { 5041 ASTContext &C = CGM.getContext(); 5042 QualType FlagsTy; 5043 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5044 RecordDecl *KmpDependInfoRD = 5045 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5046 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5047 llvm::Value *NumDeps; 5048 LValue Base; 5049 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5050 5051 Address Begin = Base.getAddress(CGF); 5052 // Cast from pointer to array type to pointer to single element. 5053 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5054 // The basic structure here is a while-do loop. 5055 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5056 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5057 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5058 CGF.EmitBlock(BodyBB); 5059 llvm::PHINode *ElementPHI = 5060 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5061 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5062 Begin = Address(ElementPHI, Begin.getAlignment()); 5063 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5064 Base.getTBAAInfo()); 5065 // deps[i].flags = NewDepKind; 5066 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5067 LValue FlagsLVal = CGF.EmitLValueForField( 5068 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5069 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5070 FlagsLVal); 5071 5072 // Shift the address forward by one element. 5073 Address ElementNext = 5074 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5075 ElementPHI->addIncoming(ElementNext.getPointer(), 5076 CGF.Builder.GetInsertBlock()); 5077 llvm::Value *IsEmpty = 5078 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5079 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5080 // Done. 5081 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5082 } 5083 5084 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5085 const OMPExecutableDirective &D, 5086 llvm::Function *TaskFunction, 5087 QualType SharedsTy, Address Shareds, 5088 const Expr *IfCond, 5089 const OMPTaskDataTy &Data) { 5090 if (!CGF.HaveInsertPoint()) 5091 return; 5092 5093 TaskResultTy Result = 5094 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5095 llvm::Value *NewTask = Result.NewTask; 5096 llvm::Function *TaskEntry = Result.TaskEntry; 5097 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5098 LValue TDBase = Result.TDBase; 5099 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5100 // Process list of dependences. 5101 Address DependenciesArray = Address::invalid(); 5102 llvm::Value *NumOfElements; 5103 std::tie(NumOfElements, DependenciesArray) = 5104 emitDependClause(CGF, Data.Dependences, Loc); 5105 5106 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5107 // libcall. 5108 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5109 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5110 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5111 // list is not empty 5112 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5113 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5114 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5115 llvm::Value *DepTaskArgs[7]; 5116 if (!Data.Dependences.empty()) { 5117 DepTaskArgs[0] = UpLoc; 5118 DepTaskArgs[1] = ThreadID; 5119 DepTaskArgs[2] = NewTask; 5120 DepTaskArgs[3] = NumOfElements; 5121 DepTaskArgs[4] = DependenciesArray.getPointer(); 5122 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5123 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5124 } 5125 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5126 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5127 if (!Data.Tied) { 5128 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5129 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5130 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5131 } 5132 if (!Data.Dependences.empty()) { 5133 CGF.EmitRuntimeCall( 5134 OMPBuilder.getOrCreateRuntimeFunction( 5135 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5136 DepTaskArgs); 5137 } else { 5138 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5139 CGM.getModule(), OMPRTL___kmpc_omp_task), 5140 TaskArgs); 5141 } 5142 // Check if parent region is untied and build return for untied task; 5143 if (auto *Region = 5144 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5145 Region->emitUntiedSwitch(CGF); 5146 }; 5147 5148 llvm::Value *DepWaitTaskArgs[6]; 5149 if (!Data.Dependences.empty()) { 5150 DepWaitTaskArgs[0] = UpLoc; 5151 DepWaitTaskArgs[1] = ThreadID; 5152 DepWaitTaskArgs[2] = NumOfElements; 5153 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5154 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5155 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5156 } 5157 auto &M = CGM.getModule(); 5158 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5159 TaskEntry, &Data, &DepWaitTaskArgs, 5160 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5161 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5162 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5163 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5164 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5165 // is specified. 5166 if (!Data.Dependences.empty()) 5167 CGF.EmitRuntimeCall( 5168 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5169 DepWaitTaskArgs); 5170 // Call proxy_task_entry(gtid, new_task); 5171 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5172 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5173 Action.Enter(CGF); 5174 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5175 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5176 OutlinedFnArgs); 5177 }; 5178 5179 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5180 // kmp_task_t *new_task); 5181 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5182 // kmp_task_t *new_task); 5183 RegionCodeGenTy RCG(CodeGen); 5184 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5185 M, OMPRTL___kmpc_omp_task_begin_if0), 5186 TaskArgs, 5187 OMPBuilder.getOrCreateRuntimeFunction( 5188 M, OMPRTL___kmpc_omp_task_complete_if0), 5189 TaskArgs); 5190 RCG.setAction(Action); 5191 RCG(CGF); 5192 }; 5193 5194 if (IfCond) { 5195 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5196 } else { 5197 RegionCodeGenTy ThenRCG(ThenCodeGen); 5198 ThenRCG(CGF); 5199 } 5200 } 5201 5202 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5203 const OMPLoopDirective &D, 5204 llvm::Function *TaskFunction, 5205 QualType SharedsTy, Address Shareds, 5206 const Expr *IfCond, 5207 const OMPTaskDataTy &Data) { 5208 if (!CGF.HaveInsertPoint()) 5209 return; 5210 TaskResultTy Result = 5211 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5212 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5213 // libcall. 5214 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5215 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5216 // sched, kmp_uint64 grainsize, void *task_dup); 5217 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5218 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5219 llvm::Value *IfVal; 5220 if (IfCond) { 5221 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5222 /*isSigned=*/true); 5223 } else { 5224 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5225 } 5226 5227 LValue LBLVal = CGF.EmitLValueForField( 5228 Result.TDBase, 5229 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5230 const auto *LBVar = 5231 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5232 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5233 LBLVal.getQuals(), 5234 /*IsInitializer=*/true); 5235 LValue UBLVal = CGF.EmitLValueForField( 5236 Result.TDBase, 5237 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5238 const auto *UBVar = 5239 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5240 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5241 UBLVal.getQuals(), 5242 /*IsInitializer=*/true); 5243 LValue StLVal = CGF.EmitLValueForField( 5244 Result.TDBase, 5245 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5246 const auto *StVar = 5247 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5248 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5249 StLVal.getQuals(), 5250 /*IsInitializer=*/true); 5251 // Store reductions address. 5252 LValue RedLVal = CGF.EmitLValueForField( 5253 Result.TDBase, 5254 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5255 if (Data.Reductions) { 5256 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5257 } else { 5258 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5259 CGF.getContext().VoidPtrTy); 5260 } 5261 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5262 llvm::Value *TaskArgs[] = { 5263 UpLoc, 5264 ThreadID, 5265 Result.NewTask, 5266 IfVal, 5267 LBLVal.getPointer(CGF), 5268 UBLVal.getPointer(CGF), 5269 CGF.EmitLoadOfScalar(StLVal, Loc), 5270 llvm::ConstantInt::getSigned( 5271 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5272 llvm::ConstantInt::getSigned( 5273 CGF.IntTy, Data.Schedule.getPointer() 5274 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5275 : NoSchedule), 5276 Data.Schedule.getPointer() 5277 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5278 /*isSigned=*/false) 5279 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5280 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5281 Result.TaskDupFn, CGF.VoidPtrTy) 5282 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5283 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5284 CGM.getModule(), OMPRTL___kmpc_taskloop), 5285 TaskArgs); 5286 } 5287 5288 /// Emit reduction operation for each element of array (required for 5289 /// array sections) LHS op = RHS. 5290 /// \param Type Type of array. 5291 /// \param LHSVar Variable on the left side of the reduction operation 5292 /// (references element of array in original variable). 5293 /// \param RHSVar Variable on the right side of the reduction operation 5294 /// (references element of array in original variable). 5295 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5296 /// RHSVar. 5297 static void EmitOMPAggregateReduction( 5298 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5299 const VarDecl *RHSVar, 5300 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5301 const Expr *, const Expr *)> &RedOpGen, 5302 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5303 const Expr *UpExpr = nullptr) { 5304 // Perform element-by-element initialization. 5305 QualType ElementTy; 5306 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5307 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5308 5309 // Drill down to the base element type on both arrays. 5310 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5311 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5312 5313 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5314 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5315 // Cast from pointer to array type to pointer to single element. 5316 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5317 // The basic structure here is a while-do loop. 5318 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5319 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5320 llvm::Value *IsEmpty = 5321 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5322 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5323 5324 // Enter the loop body, making that address the current address. 5325 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5326 CGF.EmitBlock(BodyBB); 5327 5328 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5329 5330 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5331 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5332 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5333 Address RHSElementCurrent = 5334 Address(RHSElementPHI, 5335 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5336 5337 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5338 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5339 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5340 Address LHSElementCurrent = 5341 Address(LHSElementPHI, 5342 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5343 5344 // Emit copy. 5345 CodeGenFunction::OMPPrivateScope Scope(CGF); 5346 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5347 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5348 Scope.Privatize(); 5349 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5350 Scope.ForceCleanup(); 5351 5352 // Shift the address forward by one element. 5353 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5354 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5355 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5356 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5357 // Check whether we've reached the end. 5358 llvm::Value *Done = 5359 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5360 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5361 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5362 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5363 5364 // Done. 5365 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5366 } 5367 5368 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5369 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5370 /// UDR combiner function. 5371 static void emitReductionCombiner(CodeGenFunction &CGF, 5372 const Expr *ReductionOp) { 5373 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5374 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5375 if (const auto *DRE = 5376 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5377 if (const auto *DRD = 5378 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5379 std::pair<llvm::Function *, llvm::Function *> Reduction = 5380 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5381 RValue Func = RValue::get(Reduction.first); 5382 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5383 CGF.EmitIgnoredExpr(ReductionOp); 5384 return; 5385 } 5386 CGF.EmitIgnoredExpr(ReductionOp); 5387 } 5388 5389 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5390 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5391 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5392 ArrayRef<const Expr *> ReductionOps) { 5393 ASTContext &C = CGM.getContext(); 5394 5395 // void reduction_func(void *LHSArg, void *RHSArg); 5396 FunctionArgList Args; 5397 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5398 ImplicitParamDecl::Other); 5399 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5400 ImplicitParamDecl::Other); 5401 Args.push_back(&LHSArg); 5402 Args.push_back(&RHSArg); 5403 const auto &CGFI = 5404 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5405 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5406 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5407 llvm::GlobalValue::InternalLinkage, Name, 5408 &CGM.getModule()); 5409 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5410 Fn->setDoesNotRecurse(); 5411 CodeGenFunction CGF(CGM); 5412 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5413 5414 // Dst = (void*[n])(LHSArg); 5415 // Src = (void*[n])(RHSArg); 5416 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5417 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5418 ArgsType), CGF.getPointerAlign()); 5419 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5420 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5421 ArgsType), CGF.getPointerAlign()); 5422 5423 // ... 5424 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5425 // ... 5426 CodeGenFunction::OMPPrivateScope Scope(CGF); 5427 auto IPriv = Privates.begin(); 5428 unsigned Idx = 0; 5429 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5430 const auto *RHSVar = 5431 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5432 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5433 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5434 }); 5435 const auto *LHSVar = 5436 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5437 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5438 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5439 }); 5440 QualType PrivTy = (*IPriv)->getType(); 5441 if (PrivTy->isVariablyModifiedType()) { 5442 // Get array size and emit VLA type. 5443 ++Idx; 5444 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5445 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5446 const VariableArrayType *VLA = 5447 CGF.getContext().getAsVariableArrayType(PrivTy); 5448 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5449 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5450 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5451 CGF.EmitVariablyModifiedType(PrivTy); 5452 } 5453 } 5454 Scope.Privatize(); 5455 IPriv = Privates.begin(); 5456 auto ILHS = LHSExprs.begin(); 5457 auto IRHS = RHSExprs.begin(); 5458 for (const Expr *E : ReductionOps) { 5459 if ((*IPriv)->getType()->isArrayType()) { 5460 // Emit reduction for array section. 5461 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5462 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5463 EmitOMPAggregateReduction( 5464 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5465 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5466 emitReductionCombiner(CGF, E); 5467 }); 5468 } else { 5469 // Emit reduction for array subscript or single variable. 5470 emitReductionCombiner(CGF, E); 5471 } 5472 ++IPriv; 5473 ++ILHS; 5474 ++IRHS; 5475 } 5476 Scope.ForceCleanup(); 5477 CGF.FinishFunction(); 5478 return Fn; 5479 } 5480 5481 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5482 const Expr *ReductionOp, 5483 const Expr *PrivateRef, 5484 const DeclRefExpr *LHS, 5485 const DeclRefExpr *RHS) { 5486 if (PrivateRef->getType()->isArrayType()) { 5487 // Emit reduction for array section. 5488 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5489 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5490 EmitOMPAggregateReduction( 5491 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5492 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5493 emitReductionCombiner(CGF, ReductionOp); 5494 }); 5495 } else { 5496 // Emit reduction for array subscript or single variable. 5497 emitReductionCombiner(CGF, ReductionOp); 5498 } 5499 } 5500 5501 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5502 ArrayRef<const Expr *> Privates, 5503 ArrayRef<const Expr *> LHSExprs, 5504 ArrayRef<const Expr *> RHSExprs, 5505 ArrayRef<const Expr *> ReductionOps, 5506 ReductionOptionsTy Options) { 5507 if (!CGF.HaveInsertPoint()) 5508 return; 5509 5510 bool WithNowait = Options.WithNowait; 5511 bool SimpleReduction = Options.SimpleReduction; 5512 5513 // Next code should be emitted for reduction: 5514 // 5515 // static kmp_critical_name lock = { 0 }; 5516 // 5517 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5518 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5519 // ... 5520 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5521 // *(Type<n>-1*)rhs[<n>-1]); 5522 // } 5523 // 5524 // ... 5525 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5526 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5527 // RedList, reduce_func, &<lock>)) { 5528 // case 1: 5529 // ... 5530 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5531 // ... 5532 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5533 // break; 5534 // case 2: 5535 // ... 5536 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5537 // ... 5538 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5539 // break; 5540 // default:; 5541 // } 5542 // 5543 // if SimpleReduction is true, only the next code is generated: 5544 // ... 5545 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5546 // ... 5547 5548 ASTContext &C = CGM.getContext(); 5549 5550 if (SimpleReduction) { 5551 CodeGenFunction::RunCleanupsScope Scope(CGF); 5552 auto IPriv = Privates.begin(); 5553 auto ILHS = LHSExprs.begin(); 5554 auto IRHS = RHSExprs.begin(); 5555 for (const Expr *E : ReductionOps) { 5556 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5557 cast<DeclRefExpr>(*IRHS)); 5558 ++IPriv; 5559 ++ILHS; 5560 ++IRHS; 5561 } 5562 return; 5563 } 5564 5565 // 1. Build a list of reduction variables. 5566 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5567 auto Size = RHSExprs.size(); 5568 for (const Expr *E : Privates) { 5569 if (E->getType()->isVariablyModifiedType()) 5570 // Reserve place for array size. 5571 ++Size; 5572 } 5573 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5574 QualType ReductionArrayTy = 5575 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5576 /*IndexTypeQuals=*/0); 5577 Address ReductionList = 5578 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5579 auto IPriv = Privates.begin(); 5580 unsigned Idx = 0; 5581 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5582 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5583 CGF.Builder.CreateStore( 5584 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5585 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5586 Elem); 5587 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5588 // Store array size. 5589 ++Idx; 5590 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5591 llvm::Value *Size = CGF.Builder.CreateIntCast( 5592 CGF.getVLASize( 5593 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5594 .NumElts, 5595 CGF.SizeTy, /*isSigned=*/false); 5596 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5597 Elem); 5598 } 5599 } 5600 5601 // 2. Emit reduce_func(). 5602 llvm::Function *ReductionFn = emitReductionFunction( 5603 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5604 LHSExprs, RHSExprs, ReductionOps); 5605 5606 // 3. Create static kmp_critical_name lock = { 0 }; 5607 std::string Name = getName({"reduction"}); 5608 llvm::Value *Lock = getCriticalRegionLock(Name); 5609 5610 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5611 // RedList, reduce_func, &<lock>); 5612 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5613 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5614 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5615 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5616 ReductionList.getPointer(), CGF.VoidPtrTy); 5617 llvm::Value *Args[] = { 5618 IdentTLoc, // ident_t *<loc> 5619 ThreadId, // i32 <gtid> 5620 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5621 ReductionArrayTySize, // size_type sizeof(RedList) 5622 RL, // void *RedList 5623 ReductionFn, // void (*) (void *, void *) <reduce_func> 5624 Lock // kmp_critical_name *&<lock> 5625 }; 5626 llvm::Value *Res = CGF.EmitRuntimeCall( 5627 OMPBuilder.getOrCreateRuntimeFunction( 5628 CGM.getModule(), 5629 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5630 Args); 5631 5632 // 5. Build switch(res) 5633 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5634 llvm::SwitchInst *SwInst = 5635 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5636 5637 // 6. Build case 1: 5638 // ... 5639 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5640 // ... 5641 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5642 // break; 5643 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5644 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5645 CGF.EmitBlock(Case1BB); 5646 5647 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5648 llvm::Value *EndArgs[] = { 5649 IdentTLoc, // ident_t *<loc> 5650 ThreadId, // i32 <gtid> 5651 Lock // kmp_critical_name *&<lock> 5652 }; 5653 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5654 CodeGenFunction &CGF, PrePostActionTy &Action) { 5655 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5656 auto IPriv = Privates.begin(); 5657 auto ILHS = LHSExprs.begin(); 5658 auto IRHS = RHSExprs.begin(); 5659 for (const Expr *E : ReductionOps) { 5660 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5661 cast<DeclRefExpr>(*IRHS)); 5662 ++IPriv; 5663 ++ILHS; 5664 ++IRHS; 5665 } 5666 }; 5667 RegionCodeGenTy RCG(CodeGen); 5668 CommonActionTy Action( 5669 nullptr, llvm::None, 5670 OMPBuilder.getOrCreateRuntimeFunction( 5671 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5672 : OMPRTL___kmpc_end_reduce), 5673 EndArgs); 5674 RCG.setAction(Action); 5675 RCG(CGF); 5676 5677 CGF.EmitBranch(DefaultBB); 5678 5679 // 7. Build case 2: 5680 // ... 5681 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5682 // ... 5683 // break; 5684 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5685 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5686 CGF.EmitBlock(Case2BB); 5687 5688 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5689 CodeGenFunction &CGF, PrePostActionTy &Action) { 5690 auto ILHS = LHSExprs.begin(); 5691 auto IRHS = RHSExprs.begin(); 5692 auto IPriv = Privates.begin(); 5693 for (const Expr *E : ReductionOps) { 5694 const Expr *XExpr = nullptr; 5695 const Expr *EExpr = nullptr; 5696 const Expr *UpExpr = nullptr; 5697 BinaryOperatorKind BO = BO_Comma; 5698 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5699 if (BO->getOpcode() == BO_Assign) { 5700 XExpr = BO->getLHS(); 5701 UpExpr = BO->getRHS(); 5702 } 5703 } 5704 // Try to emit update expression as a simple atomic. 5705 const Expr *RHSExpr = UpExpr; 5706 if (RHSExpr) { 5707 // Analyze RHS part of the whole expression. 5708 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5709 RHSExpr->IgnoreParenImpCasts())) { 5710 // If this is a conditional operator, analyze its condition for 5711 // min/max reduction operator. 5712 RHSExpr = ACO->getCond(); 5713 } 5714 if (const auto *BORHS = 5715 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5716 EExpr = BORHS->getRHS(); 5717 BO = BORHS->getOpcode(); 5718 } 5719 } 5720 if (XExpr) { 5721 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5722 auto &&AtomicRedGen = [BO, VD, 5723 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5724 const Expr *EExpr, const Expr *UpExpr) { 5725 LValue X = CGF.EmitLValue(XExpr); 5726 RValue E; 5727 if (EExpr) 5728 E = CGF.EmitAnyExpr(EExpr); 5729 CGF.EmitOMPAtomicSimpleUpdateExpr( 5730 X, E, BO, /*IsXLHSInRHSPart=*/true, 5731 llvm::AtomicOrdering::Monotonic, Loc, 5732 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5733 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5734 PrivateScope.addPrivate( 5735 VD, [&CGF, VD, XRValue, Loc]() { 5736 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5737 CGF.emitOMPSimpleStore( 5738 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5739 VD->getType().getNonReferenceType(), Loc); 5740 return LHSTemp; 5741 }); 5742 (void)PrivateScope.Privatize(); 5743 return CGF.EmitAnyExpr(UpExpr); 5744 }); 5745 }; 5746 if ((*IPriv)->getType()->isArrayType()) { 5747 // Emit atomic reduction for array section. 5748 const auto *RHSVar = 5749 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5750 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5751 AtomicRedGen, XExpr, EExpr, UpExpr); 5752 } else { 5753 // Emit atomic reduction for array subscript or single variable. 5754 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5755 } 5756 } else { 5757 // Emit as a critical region. 5758 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5759 const Expr *, const Expr *) { 5760 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5761 std::string Name = RT.getName({"atomic_reduction"}); 5762 RT.emitCriticalRegion( 5763 CGF, Name, 5764 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5765 Action.Enter(CGF); 5766 emitReductionCombiner(CGF, E); 5767 }, 5768 Loc); 5769 }; 5770 if ((*IPriv)->getType()->isArrayType()) { 5771 const auto *LHSVar = 5772 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5773 const auto *RHSVar = 5774 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5775 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5776 CritRedGen); 5777 } else { 5778 CritRedGen(CGF, nullptr, nullptr, nullptr); 5779 } 5780 } 5781 ++ILHS; 5782 ++IRHS; 5783 ++IPriv; 5784 } 5785 }; 5786 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5787 if (!WithNowait) { 5788 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5789 llvm::Value *EndArgs[] = { 5790 IdentTLoc, // ident_t *<loc> 5791 ThreadId, // i32 <gtid> 5792 Lock // kmp_critical_name *&<lock> 5793 }; 5794 CommonActionTy Action(nullptr, llvm::None, 5795 OMPBuilder.getOrCreateRuntimeFunction( 5796 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5797 EndArgs); 5798 AtomicRCG.setAction(Action); 5799 AtomicRCG(CGF); 5800 } else { 5801 AtomicRCG(CGF); 5802 } 5803 5804 CGF.EmitBranch(DefaultBB); 5805 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5806 } 5807 5808 /// Generates unique name for artificial threadprivate variables. 5809 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5810 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5811 const Expr *Ref) { 5812 SmallString<256> Buffer; 5813 llvm::raw_svector_ostream Out(Buffer); 5814 const clang::DeclRefExpr *DE; 5815 const VarDecl *D = ::getBaseDecl(Ref, DE); 5816 if (!D) 5817 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5818 D = D->getCanonicalDecl(); 5819 std::string Name = CGM.getOpenMPRuntime().getName( 5820 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5821 Out << Prefix << Name << "_" 5822 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5823 return std::string(Out.str()); 5824 } 5825 5826 /// Emits reduction initializer function: 5827 /// \code 5828 /// void @.red_init(void* %arg, void* %orig) { 5829 /// %0 = bitcast void* %arg to <type>* 5830 /// store <type> <init>, <type>* %0 5831 /// ret void 5832 /// } 5833 /// \endcode 5834 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5835 SourceLocation Loc, 5836 ReductionCodeGen &RCG, unsigned N) { 5837 ASTContext &C = CGM.getContext(); 5838 QualType VoidPtrTy = C.VoidPtrTy; 5839 VoidPtrTy.addRestrict(); 5840 FunctionArgList Args; 5841 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5842 ImplicitParamDecl::Other); 5843 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5844 ImplicitParamDecl::Other); 5845 Args.emplace_back(&Param); 5846 Args.emplace_back(&ParamOrig); 5847 const auto &FnInfo = 5848 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5849 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5850 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5851 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5852 Name, &CGM.getModule()); 5853 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5854 Fn->setDoesNotRecurse(); 5855 CodeGenFunction CGF(CGM); 5856 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5857 Address PrivateAddr = CGF.EmitLoadOfPointer( 5858 CGF.GetAddrOfLocalVar(&Param), 5859 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5860 llvm::Value *Size = nullptr; 5861 // If the size of the reduction item is non-constant, load it from global 5862 // threadprivate variable. 5863 if (RCG.getSizes(N).second) { 5864 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5865 CGF, CGM.getContext().getSizeType(), 5866 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5867 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5868 CGM.getContext().getSizeType(), Loc); 5869 } 5870 RCG.emitAggregateType(CGF, N, Size); 5871 LValue OrigLVal; 5872 // If initializer uses initializer from declare reduction construct, emit a 5873 // pointer to the address of the original reduction item (reuired by reduction 5874 // initializer) 5875 if (RCG.usesReductionInitializer(N)) { 5876 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5877 SharedAddr = CGF.EmitLoadOfPointer( 5878 SharedAddr, 5879 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5880 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5881 } else { 5882 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5883 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5884 CGM.getContext().VoidPtrTy); 5885 } 5886 // Emit the initializer: 5887 // %0 = bitcast void* %arg to <type>* 5888 // store <type> <init>, <type>* %0 5889 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5890 [](CodeGenFunction &) { return false; }); 5891 CGF.FinishFunction(); 5892 return Fn; 5893 } 5894 5895 /// Emits reduction combiner function: 5896 /// \code 5897 /// void @.red_comb(void* %arg0, void* %arg1) { 5898 /// %lhs = bitcast void* %arg0 to <type>* 5899 /// %rhs = bitcast void* %arg1 to <type>* 5900 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5901 /// store <type> %2, <type>* %lhs 5902 /// ret void 5903 /// } 5904 /// \endcode 5905 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5906 SourceLocation Loc, 5907 ReductionCodeGen &RCG, unsigned N, 5908 const Expr *ReductionOp, 5909 const Expr *LHS, const Expr *RHS, 5910 const Expr *PrivateRef) { 5911 ASTContext &C = CGM.getContext(); 5912 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5913 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5914 FunctionArgList Args; 5915 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5916 C.VoidPtrTy, ImplicitParamDecl::Other); 5917 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5918 ImplicitParamDecl::Other); 5919 Args.emplace_back(&ParamInOut); 5920 Args.emplace_back(&ParamIn); 5921 const auto &FnInfo = 5922 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5923 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5924 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5925 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5926 Name, &CGM.getModule()); 5927 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5928 Fn->setDoesNotRecurse(); 5929 CodeGenFunction CGF(CGM); 5930 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5931 llvm::Value *Size = nullptr; 5932 // If the size of the reduction item is non-constant, load it from global 5933 // threadprivate variable. 5934 if (RCG.getSizes(N).second) { 5935 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5936 CGF, CGM.getContext().getSizeType(), 5937 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5938 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5939 CGM.getContext().getSizeType(), Loc); 5940 } 5941 RCG.emitAggregateType(CGF, N, Size); 5942 // Remap lhs and rhs variables to the addresses of the function arguments. 5943 // %lhs = bitcast void* %arg0 to <type>* 5944 // %rhs = bitcast void* %arg1 to <type>* 5945 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5946 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5947 // Pull out the pointer to the variable. 5948 Address PtrAddr = CGF.EmitLoadOfPointer( 5949 CGF.GetAddrOfLocalVar(&ParamInOut), 5950 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5951 return CGF.Builder.CreateElementBitCast( 5952 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5953 }); 5954 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5955 // Pull out the pointer to the variable. 5956 Address PtrAddr = CGF.EmitLoadOfPointer( 5957 CGF.GetAddrOfLocalVar(&ParamIn), 5958 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5959 return CGF.Builder.CreateElementBitCast( 5960 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5961 }); 5962 PrivateScope.Privatize(); 5963 // Emit the combiner body: 5964 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5965 // store <type> %2, <type>* %lhs 5966 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5967 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5968 cast<DeclRefExpr>(RHS)); 5969 CGF.FinishFunction(); 5970 return Fn; 5971 } 5972 5973 /// Emits reduction finalizer function: 5974 /// \code 5975 /// void @.red_fini(void* %arg) { 5976 /// %0 = bitcast void* %arg to <type>* 5977 /// <destroy>(<type>* %0) 5978 /// ret void 5979 /// } 5980 /// \endcode 5981 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5982 SourceLocation Loc, 5983 ReductionCodeGen &RCG, unsigned N) { 5984 if (!RCG.needCleanups(N)) 5985 return nullptr; 5986 ASTContext &C = CGM.getContext(); 5987 FunctionArgList Args; 5988 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5989 ImplicitParamDecl::Other); 5990 Args.emplace_back(&Param); 5991 const auto &FnInfo = 5992 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5993 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5994 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5995 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5996 Name, &CGM.getModule()); 5997 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5998 Fn->setDoesNotRecurse(); 5999 CodeGenFunction CGF(CGM); 6000 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6001 Address PrivateAddr = CGF.EmitLoadOfPointer( 6002 CGF.GetAddrOfLocalVar(&Param), 6003 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6004 llvm::Value *Size = nullptr; 6005 // If the size of the reduction item is non-constant, load it from global 6006 // threadprivate variable. 6007 if (RCG.getSizes(N).second) { 6008 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6009 CGF, CGM.getContext().getSizeType(), 6010 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6011 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6012 CGM.getContext().getSizeType(), Loc); 6013 } 6014 RCG.emitAggregateType(CGF, N, Size); 6015 // Emit the finalizer body: 6016 // <destroy>(<type>* %0) 6017 RCG.emitCleanups(CGF, N, PrivateAddr); 6018 CGF.FinishFunction(Loc); 6019 return Fn; 6020 } 6021 6022 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6023 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6024 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6025 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6026 return nullptr; 6027 6028 // Build typedef struct: 6029 // kmp_taskred_input { 6030 // void *reduce_shar; // shared reduction item 6031 // void *reduce_orig; // original reduction item used for initialization 6032 // size_t reduce_size; // size of data item 6033 // void *reduce_init; // data initialization routine 6034 // void *reduce_fini; // data finalization routine 6035 // void *reduce_comb; // data combiner routine 6036 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6037 // } kmp_taskred_input_t; 6038 ASTContext &C = CGM.getContext(); 6039 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6040 RD->startDefinition(); 6041 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6042 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6043 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6044 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6045 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6046 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6047 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6048 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6049 RD->completeDefinition(); 6050 QualType RDType = C.getRecordType(RD); 6051 unsigned Size = Data.ReductionVars.size(); 6052 llvm::APInt ArraySize(/*numBits=*/64, Size); 6053 QualType ArrayRDType = C.getConstantArrayType( 6054 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6055 // kmp_task_red_input_t .rd_input.[Size]; 6056 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6057 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6058 Data.ReductionCopies, Data.ReductionOps); 6059 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6060 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6061 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6062 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6063 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6064 TaskRedInput.getPointer(), Idxs, 6065 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6066 ".rd_input.gep."); 6067 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6068 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6069 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6070 RCG.emitSharedOrigLValue(CGF, Cnt); 6071 llvm::Value *CastedShared = 6072 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6073 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6074 // ElemLVal.reduce_orig = &Origs[Cnt]; 6075 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6076 llvm::Value *CastedOrig = 6077 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6078 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6079 RCG.emitAggregateType(CGF, Cnt); 6080 llvm::Value *SizeValInChars; 6081 llvm::Value *SizeVal; 6082 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6083 // We use delayed creation/initialization for VLAs and array sections. It is 6084 // required because runtime does not provide the way to pass the sizes of 6085 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6086 // threadprivate global variables are used to store these values and use 6087 // them in the functions. 6088 bool DelayedCreation = !!SizeVal; 6089 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6090 /*isSigned=*/false); 6091 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6092 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6093 // ElemLVal.reduce_init = init; 6094 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6095 llvm::Value *InitAddr = 6096 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6097 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6098 // ElemLVal.reduce_fini = fini; 6099 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6100 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6101 llvm::Value *FiniAddr = Fini 6102 ? CGF.EmitCastToVoidPtr(Fini) 6103 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6104 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6105 // ElemLVal.reduce_comb = comb; 6106 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6107 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6108 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6109 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6110 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6111 // ElemLVal.flags = 0; 6112 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6113 if (DelayedCreation) { 6114 CGF.EmitStoreOfScalar( 6115 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6116 FlagsLVal); 6117 } else 6118 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6119 FlagsLVal.getType()); 6120 } 6121 if (Data.IsReductionWithTaskMod) { 6122 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6123 // is_ws, int num, void *data); 6124 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6125 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6126 CGM.IntTy, /*isSigned=*/true); 6127 llvm::Value *Args[] = { 6128 IdentTLoc, GTid, 6129 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6130 /*isSigned=*/true), 6131 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6132 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6133 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6134 return CGF.EmitRuntimeCall( 6135 OMPBuilder.getOrCreateRuntimeFunction( 6136 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6137 Args); 6138 } 6139 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6140 llvm::Value *Args[] = { 6141 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6142 /*isSigned=*/true), 6143 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6144 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6145 CGM.VoidPtrTy)}; 6146 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6147 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6148 Args); 6149 } 6150 6151 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6152 SourceLocation Loc, 6153 bool IsWorksharingReduction) { 6154 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6155 // is_ws, int num, void *data); 6156 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6157 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6158 CGM.IntTy, /*isSigned=*/true); 6159 llvm::Value *Args[] = {IdentTLoc, GTid, 6160 llvm::ConstantInt::get(CGM.IntTy, 6161 IsWorksharingReduction ? 1 : 0, 6162 /*isSigned=*/true)}; 6163 (void)CGF.EmitRuntimeCall( 6164 OMPBuilder.getOrCreateRuntimeFunction( 6165 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6166 Args); 6167 } 6168 6169 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6170 SourceLocation Loc, 6171 ReductionCodeGen &RCG, 6172 unsigned N) { 6173 auto Sizes = RCG.getSizes(N); 6174 // Emit threadprivate global variable if the type is non-constant 6175 // (Sizes.second = nullptr). 6176 if (Sizes.second) { 6177 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6178 /*isSigned=*/false); 6179 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6180 CGF, CGM.getContext().getSizeType(), 6181 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6182 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6183 } 6184 } 6185 6186 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6187 SourceLocation Loc, 6188 llvm::Value *ReductionsPtr, 6189 LValue SharedLVal) { 6190 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6191 // *d); 6192 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6193 CGM.IntTy, 6194 /*isSigned=*/true), 6195 ReductionsPtr, 6196 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6197 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6198 return Address( 6199 CGF.EmitRuntimeCall( 6200 OMPBuilder.getOrCreateRuntimeFunction( 6201 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6202 Args), 6203 SharedLVal.getAlignment()); 6204 } 6205 6206 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6207 SourceLocation Loc) { 6208 if (!CGF.HaveInsertPoint()) 6209 return; 6210 6211 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6212 OMPBuilder.createTaskwait(CGF.Builder); 6213 } else { 6214 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6215 // global_tid); 6216 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6217 // Ignore return result until untied tasks are supported. 6218 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6219 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6220 Args); 6221 } 6222 6223 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6224 Region->emitUntiedSwitch(CGF); 6225 } 6226 6227 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6228 OpenMPDirectiveKind InnerKind, 6229 const RegionCodeGenTy &CodeGen, 6230 bool HasCancel) { 6231 if (!CGF.HaveInsertPoint()) 6232 return; 6233 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6234 InnerKind != OMPD_critical && 6235 InnerKind != OMPD_master); 6236 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6237 } 6238 6239 namespace { 6240 enum RTCancelKind { 6241 CancelNoreq = 0, 6242 CancelParallel = 1, 6243 CancelLoop = 2, 6244 CancelSections = 3, 6245 CancelTaskgroup = 4 6246 }; 6247 } // anonymous namespace 6248 6249 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6250 RTCancelKind CancelKind = CancelNoreq; 6251 if (CancelRegion == OMPD_parallel) 6252 CancelKind = CancelParallel; 6253 else if (CancelRegion == OMPD_for) 6254 CancelKind = CancelLoop; 6255 else if (CancelRegion == OMPD_sections) 6256 CancelKind = CancelSections; 6257 else { 6258 assert(CancelRegion == OMPD_taskgroup); 6259 CancelKind = CancelTaskgroup; 6260 } 6261 return CancelKind; 6262 } 6263 6264 void CGOpenMPRuntime::emitCancellationPointCall( 6265 CodeGenFunction &CGF, SourceLocation Loc, 6266 OpenMPDirectiveKind CancelRegion) { 6267 if (!CGF.HaveInsertPoint()) 6268 return; 6269 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6270 // global_tid, kmp_int32 cncl_kind); 6271 if (auto *OMPRegionInfo = 6272 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6273 // For 'cancellation point taskgroup', the task region info may not have a 6274 // cancel. This may instead happen in another adjacent task. 6275 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6276 llvm::Value *Args[] = { 6277 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6278 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6279 // Ignore return result until untied tasks are supported. 6280 llvm::Value *Result = CGF.EmitRuntimeCall( 6281 OMPBuilder.getOrCreateRuntimeFunction( 6282 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6283 Args); 6284 // if (__kmpc_cancellationpoint()) { 6285 // exit from construct; 6286 // } 6287 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6288 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6289 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6290 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6291 CGF.EmitBlock(ExitBB); 6292 // exit from construct; 6293 CodeGenFunction::JumpDest CancelDest = 6294 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6295 CGF.EmitBranchThroughCleanup(CancelDest); 6296 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6297 } 6298 } 6299 } 6300 6301 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6302 const Expr *IfCond, 6303 OpenMPDirectiveKind CancelRegion) { 6304 if (!CGF.HaveInsertPoint()) 6305 return; 6306 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6307 // kmp_int32 cncl_kind); 6308 auto &M = CGM.getModule(); 6309 if (auto *OMPRegionInfo = 6310 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6311 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6312 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6313 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6314 llvm::Value *Args[] = { 6315 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6316 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6317 // Ignore return result until untied tasks are supported. 6318 llvm::Value *Result = CGF.EmitRuntimeCall( 6319 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6320 // if (__kmpc_cancel()) { 6321 // exit from construct; 6322 // } 6323 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6324 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6325 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6326 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6327 CGF.EmitBlock(ExitBB); 6328 // exit from construct; 6329 CodeGenFunction::JumpDest CancelDest = 6330 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6331 CGF.EmitBranchThroughCleanup(CancelDest); 6332 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6333 }; 6334 if (IfCond) { 6335 emitIfClause(CGF, IfCond, ThenGen, 6336 [](CodeGenFunction &, PrePostActionTy &) {}); 6337 } else { 6338 RegionCodeGenTy ThenRCG(ThenGen); 6339 ThenRCG(CGF); 6340 } 6341 } 6342 } 6343 6344 namespace { 6345 /// Cleanup action for uses_allocators support. 6346 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6347 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6348 6349 public: 6350 OMPUsesAllocatorsActionTy( 6351 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6352 : Allocators(Allocators) {} 6353 void Enter(CodeGenFunction &CGF) override { 6354 if (!CGF.HaveInsertPoint()) 6355 return; 6356 for (const auto &AllocatorData : Allocators) { 6357 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6358 CGF, AllocatorData.first, AllocatorData.second); 6359 } 6360 } 6361 void Exit(CodeGenFunction &CGF) override { 6362 if (!CGF.HaveInsertPoint()) 6363 return; 6364 for (const auto &AllocatorData : Allocators) { 6365 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6366 AllocatorData.first); 6367 } 6368 } 6369 }; 6370 } // namespace 6371 6372 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6373 const OMPExecutableDirective &D, StringRef ParentName, 6374 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6375 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6376 assert(!ParentName.empty() && "Invalid target region parent name!"); 6377 HasEmittedTargetRegion = true; 6378 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6379 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6380 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6381 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6382 if (!D.AllocatorTraits) 6383 continue; 6384 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6385 } 6386 } 6387 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6388 CodeGen.setAction(UsesAllocatorAction); 6389 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6390 IsOffloadEntry, CodeGen); 6391 } 6392 6393 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6394 const Expr *Allocator, 6395 const Expr *AllocatorTraits) { 6396 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6397 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6398 // Use default memspace handle. 6399 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6400 llvm::Value *NumTraits = llvm::ConstantInt::get( 6401 CGF.IntTy, cast<ConstantArrayType>( 6402 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6403 ->getSize() 6404 .getLimitedValue()); 6405 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6406 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6407 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6408 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6409 AllocatorTraitsLVal.getBaseInfo(), 6410 AllocatorTraitsLVal.getTBAAInfo()); 6411 llvm::Value *Traits = 6412 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6413 6414 llvm::Value *AllocatorVal = 6415 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6416 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6417 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6418 // Store to allocator. 6419 CGF.EmitVarDecl(*cast<VarDecl>( 6420 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6421 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6422 AllocatorVal = 6423 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6424 Allocator->getType(), Allocator->getExprLoc()); 6425 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6426 } 6427 6428 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6429 const Expr *Allocator) { 6430 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6431 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6432 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6433 llvm::Value *AllocatorVal = 6434 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6435 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6436 CGF.getContext().VoidPtrTy, 6437 Allocator->getExprLoc()); 6438 (void)CGF.EmitRuntimeCall( 6439 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6440 OMPRTL___kmpc_destroy_allocator), 6441 {ThreadId, AllocatorVal}); 6442 } 6443 6444 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6445 const OMPExecutableDirective &D, StringRef ParentName, 6446 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6447 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6448 // Create a unique name for the entry function using the source location 6449 // information of the current target region. The name will be something like: 6450 // 6451 // __omp_offloading_DD_FFFF_PP_lBB 6452 // 6453 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6454 // mangled name of the function that encloses the target region and BB is the 6455 // line number of the target region. 6456 6457 unsigned DeviceID; 6458 unsigned FileID; 6459 unsigned Line; 6460 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6461 Line); 6462 SmallString<64> EntryFnName; 6463 { 6464 llvm::raw_svector_ostream OS(EntryFnName); 6465 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6466 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6467 } 6468 6469 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6470 6471 CodeGenFunction CGF(CGM, true); 6472 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6473 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6474 6475 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6476 6477 // If this target outline function is not an offload entry, we don't need to 6478 // register it. 6479 if (!IsOffloadEntry) 6480 return; 6481 6482 // The target region ID is used by the runtime library to identify the current 6483 // target region, so it only has to be unique and not necessarily point to 6484 // anything. It could be the pointer to the outlined function that implements 6485 // the target region, but we aren't using that so that the compiler doesn't 6486 // need to keep that, and could therefore inline the host function if proven 6487 // worthwhile during optimization. In the other hand, if emitting code for the 6488 // device, the ID has to be the function address so that it can retrieved from 6489 // the offloading entry and launched by the runtime library. We also mark the 6490 // outlined function to have external linkage in case we are emitting code for 6491 // the device, because these functions will be entry points to the device. 6492 6493 if (CGM.getLangOpts().OpenMPIsDevice) { 6494 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6495 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6496 OutlinedFn->setDSOLocal(false); 6497 if (CGM.getTriple().isAMDGCN()) 6498 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6499 } else { 6500 std::string Name = getName({EntryFnName, "region_id"}); 6501 OutlinedFnID = new llvm::GlobalVariable( 6502 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6503 llvm::GlobalValue::WeakAnyLinkage, 6504 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6505 } 6506 6507 // Register the information for the entry associated with this target region. 6508 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6509 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6510 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6511 } 6512 6513 /// Checks if the expression is constant or does not have non-trivial function 6514 /// calls. 6515 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6516 // We can skip constant expressions. 6517 // We can skip expressions with trivial calls or simple expressions. 6518 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6519 !E->hasNonTrivialCall(Ctx)) && 6520 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6521 } 6522 6523 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6524 const Stmt *Body) { 6525 const Stmt *Child = Body->IgnoreContainers(); 6526 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6527 Child = nullptr; 6528 for (const Stmt *S : C->body()) { 6529 if (const auto *E = dyn_cast<Expr>(S)) { 6530 if (isTrivial(Ctx, E)) 6531 continue; 6532 } 6533 // Some of the statements can be ignored. 6534 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6535 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6536 continue; 6537 // Analyze declarations. 6538 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6539 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6540 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6541 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6542 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6543 isa<UsingDirectiveDecl>(D) || 6544 isa<OMPDeclareReductionDecl>(D) || 6545 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6546 return true; 6547 const auto *VD = dyn_cast<VarDecl>(D); 6548 if (!VD) 6549 return false; 6550 return VD->isConstexpr() || 6551 ((VD->getType().isTrivialType(Ctx) || 6552 VD->getType()->isReferenceType()) && 6553 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6554 })) 6555 continue; 6556 } 6557 // Found multiple children - cannot get the one child only. 6558 if (Child) 6559 return nullptr; 6560 Child = S; 6561 } 6562 if (Child) 6563 Child = Child->IgnoreContainers(); 6564 } 6565 return Child; 6566 } 6567 6568 /// Emit the number of teams for a target directive. Inspect the num_teams 6569 /// clause associated with a teams construct combined or closely nested 6570 /// with the target directive. 6571 /// 6572 /// Emit a team of size one for directives such as 'target parallel' that 6573 /// have no associated teams construct. 6574 /// 6575 /// Otherwise, return nullptr. 6576 static llvm::Value * 6577 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6578 const OMPExecutableDirective &D) { 6579 assert(!CGF.getLangOpts().OpenMPIsDevice && 6580 "Clauses associated with the teams directive expected to be emitted " 6581 "only for the host!"); 6582 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6583 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6584 "Expected target-based executable directive."); 6585 CGBuilderTy &Bld = CGF.Builder; 6586 switch (DirectiveKind) { 6587 case OMPD_target: { 6588 const auto *CS = D.getInnermostCapturedStmt(); 6589 const auto *Body = 6590 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6591 const Stmt *ChildStmt = 6592 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6593 if (const auto *NestedDir = 6594 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6595 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6596 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6597 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6598 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6599 const Expr *NumTeams = 6600 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6601 llvm::Value *NumTeamsVal = 6602 CGF.EmitScalarExpr(NumTeams, 6603 /*IgnoreResultAssign*/ true); 6604 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6605 /*isSigned=*/true); 6606 } 6607 return Bld.getInt32(0); 6608 } 6609 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6610 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6611 return Bld.getInt32(1); 6612 return Bld.getInt32(0); 6613 } 6614 return nullptr; 6615 } 6616 case OMPD_target_teams: 6617 case OMPD_target_teams_distribute: 6618 case OMPD_target_teams_distribute_simd: 6619 case OMPD_target_teams_distribute_parallel_for: 6620 case OMPD_target_teams_distribute_parallel_for_simd: { 6621 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6622 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6623 const Expr *NumTeams = 6624 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6625 llvm::Value *NumTeamsVal = 6626 CGF.EmitScalarExpr(NumTeams, 6627 /*IgnoreResultAssign*/ true); 6628 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6629 /*isSigned=*/true); 6630 } 6631 return Bld.getInt32(0); 6632 } 6633 case OMPD_target_parallel: 6634 case OMPD_target_parallel_for: 6635 case OMPD_target_parallel_for_simd: 6636 case OMPD_target_simd: 6637 return Bld.getInt32(1); 6638 case OMPD_parallel: 6639 case OMPD_for: 6640 case OMPD_parallel_for: 6641 case OMPD_parallel_master: 6642 case OMPD_parallel_sections: 6643 case OMPD_for_simd: 6644 case OMPD_parallel_for_simd: 6645 case OMPD_cancel: 6646 case OMPD_cancellation_point: 6647 case OMPD_ordered: 6648 case OMPD_threadprivate: 6649 case OMPD_allocate: 6650 case OMPD_task: 6651 case OMPD_simd: 6652 case OMPD_tile: 6653 case OMPD_sections: 6654 case OMPD_section: 6655 case OMPD_single: 6656 case OMPD_master: 6657 case OMPD_critical: 6658 case OMPD_taskyield: 6659 case OMPD_barrier: 6660 case OMPD_taskwait: 6661 case OMPD_taskgroup: 6662 case OMPD_atomic: 6663 case OMPD_flush: 6664 case OMPD_depobj: 6665 case OMPD_scan: 6666 case OMPD_teams: 6667 case OMPD_target_data: 6668 case OMPD_target_exit_data: 6669 case OMPD_target_enter_data: 6670 case OMPD_distribute: 6671 case OMPD_distribute_simd: 6672 case OMPD_distribute_parallel_for: 6673 case OMPD_distribute_parallel_for_simd: 6674 case OMPD_teams_distribute: 6675 case OMPD_teams_distribute_simd: 6676 case OMPD_teams_distribute_parallel_for: 6677 case OMPD_teams_distribute_parallel_for_simd: 6678 case OMPD_target_update: 6679 case OMPD_declare_simd: 6680 case OMPD_declare_variant: 6681 case OMPD_begin_declare_variant: 6682 case OMPD_end_declare_variant: 6683 case OMPD_declare_target: 6684 case OMPD_end_declare_target: 6685 case OMPD_declare_reduction: 6686 case OMPD_declare_mapper: 6687 case OMPD_taskloop: 6688 case OMPD_taskloop_simd: 6689 case OMPD_master_taskloop: 6690 case OMPD_master_taskloop_simd: 6691 case OMPD_parallel_master_taskloop: 6692 case OMPD_parallel_master_taskloop_simd: 6693 case OMPD_requires: 6694 case OMPD_unknown: 6695 break; 6696 default: 6697 break; 6698 } 6699 llvm_unreachable("Unexpected directive kind."); 6700 } 6701 6702 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6703 llvm::Value *DefaultThreadLimitVal) { 6704 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6705 CGF.getContext(), CS->getCapturedStmt()); 6706 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6707 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6708 llvm::Value *NumThreads = nullptr; 6709 llvm::Value *CondVal = nullptr; 6710 // Handle if clause. If if clause present, the number of threads is 6711 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6712 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6713 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6714 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6715 const OMPIfClause *IfClause = nullptr; 6716 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6717 if (C->getNameModifier() == OMPD_unknown || 6718 C->getNameModifier() == OMPD_parallel) { 6719 IfClause = C; 6720 break; 6721 } 6722 } 6723 if (IfClause) { 6724 const Expr *Cond = IfClause->getCondition(); 6725 bool Result; 6726 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6727 if (!Result) 6728 return CGF.Builder.getInt32(1); 6729 } else { 6730 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6731 if (const auto *PreInit = 6732 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6733 for (const auto *I : PreInit->decls()) { 6734 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6735 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6736 } else { 6737 CodeGenFunction::AutoVarEmission Emission = 6738 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6739 CGF.EmitAutoVarCleanups(Emission); 6740 } 6741 } 6742 } 6743 CondVal = CGF.EvaluateExprAsBool(Cond); 6744 } 6745 } 6746 } 6747 // Check the value of num_threads clause iff if clause was not specified 6748 // or is not evaluated to false. 6749 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6750 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6751 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6752 const auto *NumThreadsClause = 6753 Dir->getSingleClause<OMPNumThreadsClause>(); 6754 CodeGenFunction::LexicalScope Scope( 6755 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6756 if (const auto *PreInit = 6757 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6758 for (const auto *I : PreInit->decls()) { 6759 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6760 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6761 } else { 6762 CodeGenFunction::AutoVarEmission Emission = 6763 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6764 CGF.EmitAutoVarCleanups(Emission); 6765 } 6766 } 6767 } 6768 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6769 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6770 /*isSigned=*/false); 6771 if (DefaultThreadLimitVal) 6772 NumThreads = CGF.Builder.CreateSelect( 6773 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6774 DefaultThreadLimitVal, NumThreads); 6775 } else { 6776 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6777 : CGF.Builder.getInt32(0); 6778 } 6779 // Process condition of the if clause. 6780 if (CondVal) { 6781 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6782 CGF.Builder.getInt32(1)); 6783 } 6784 return NumThreads; 6785 } 6786 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6787 return CGF.Builder.getInt32(1); 6788 return DefaultThreadLimitVal; 6789 } 6790 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6791 : CGF.Builder.getInt32(0); 6792 } 6793 6794 /// Emit the number of threads for a target directive. Inspect the 6795 /// thread_limit clause associated with a teams construct combined or closely 6796 /// nested with the target directive. 6797 /// 6798 /// Emit the num_threads clause for directives such as 'target parallel' that 6799 /// have no associated teams construct. 6800 /// 6801 /// Otherwise, return nullptr. 6802 static llvm::Value * 6803 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6804 const OMPExecutableDirective &D) { 6805 assert(!CGF.getLangOpts().OpenMPIsDevice && 6806 "Clauses associated with the teams directive expected to be emitted " 6807 "only for the host!"); 6808 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6809 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6810 "Expected target-based executable directive."); 6811 CGBuilderTy &Bld = CGF.Builder; 6812 llvm::Value *ThreadLimitVal = nullptr; 6813 llvm::Value *NumThreadsVal = nullptr; 6814 switch (DirectiveKind) { 6815 case OMPD_target: { 6816 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6817 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6818 return NumThreads; 6819 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6820 CGF.getContext(), CS->getCapturedStmt()); 6821 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6822 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6823 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6824 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6825 const auto *ThreadLimitClause = 6826 Dir->getSingleClause<OMPThreadLimitClause>(); 6827 CodeGenFunction::LexicalScope Scope( 6828 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6829 if (const auto *PreInit = 6830 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6831 for (const auto *I : PreInit->decls()) { 6832 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6833 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6834 } else { 6835 CodeGenFunction::AutoVarEmission Emission = 6836 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6837 CGF.EmitAutoVarCleanups(Emission); 6838 } 6839 } 6840 } 6841 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6842 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6843 ThreadLimitVal = 6844 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6845 } 6846 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6847 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6848 CS = Dir->getInnermostCapturedStmt(); 6849 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6850 CGF.getContext(), CS->getCapturedStmt()); 6851 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6852 } 6853 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6854 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6855 CS = Dir->getInnermostCapturedStmt(); 6856 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6857 return NumThreads; 6858 } 6859 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6860 return Bld.getInt32(1); 6861 } 6862 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6863 } 6864 case OMPD_target_teams: { 6865 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6866 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6867 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6868 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6869 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6870 ThreadLimitVal = 6871 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6872 } 6873 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6874 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6875 return NumThreads; 6876 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6877 CGF.getContext(), CS->getCapturedStmt()); 6878 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6879 if (Dir->getDirectiveKind() == OMPD_distribute) { 6880 CS = Dir->getInnermostCapturedStmt(); 6881 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6882 return NumThreads; 6883 } 6884 } 6885 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6886 } 6887 case OMPD_target_teams_distribute: 6888 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6889 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6890 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6891 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6892 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6893 ThreadLimitVal = 6894 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6895 } 6896 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6897 case OMPD_target_parallel: 6898 case OMPD_target_parallel_for: 6899 case OMPD_target_parallel_for_simd: 6900 case OMPD_target_teams_distribute_parallel_for: 6901 case OMPD_target_teams_distribute_parallel_for_simd: { 6902 llvm::Value *CondVal = nullptr; 6903 // Handle if clause. If if clause present, the number of threads is 6904 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6905 if (D.hasClausesOfKind<OMPIfClause>()) { 6906 const OMPIfClause *IfClause = nullptr; 6907 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6908 if (C->getNameModifier() == OMPD_unknown || 6909 C->getNameModifier() == OMPD_parallel) { 6910 IfClause = C; 6911 break; 6912 } 6913 } 6914 if (IfClause) { 6915 const Expr *Cond = IfClause->getCondition(); 6916 bool Result; 6917 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6918 if (!Result) 6919 return Bld.getInt32(1); 6920 } else { 6921 CodeGenFunction::RunCleanupsScope Scope(CGF); 6922 CondVal = CGF.EvaluateExprAsBool(Cond); 6923 } 6924 } 6925 } 6926 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6927 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6928 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6929 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6930 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6931 ThreadLimitVal = 6932 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6933 } 6934 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6935 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6936 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6937 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6938 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6939 NumThreadsVal = 6940 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6941 ThreadLimitVal = ThreadLimitVal 6942 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6943 ThreadLimitVal), 6944 NumThreadsVal, ThreadLimitVal) 6945 : NumThreadsVal; 6946 } 6947 if (!ThreadLimitVal) 6948 ThreadLimitVal = Bld.getInt32(0); 6949 if (CondVal) 6950 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6951 return ThreadLimitVal; 6952 } 6953 case OMPD_target_teams_distribute_simd: 6954 case OMPD_target_simd: 6955 return Bld.getInt32(1); 6956 case OMPD_parallel: 6957 case OMPD_for: 6958 case OMPD_parallel_for: 6959 case OMPD_parallel_master: 6960 case OMPD_parallel_sections: 6961 case OMPD_for_simd: 6962 case OMPD_parallel_for_simd: 6963 case OMPD_cancel: 6964 case OMPD_cancellation_point: 6965 case OMPD_ordered: 6966 case OMPD_threadprivate: 6967 case OMPD_allocate: 6968 case OMPD_task: 6969 case OMPD_simd: 6970 case OMPD_tile: 6971 case OMPD_sections: 6972 case OMPD_section: 6973 case OMPD_single: 6974 case OMPD_master: 6975 case OMPD_critical: 6976 case OMPD_taskyield: 6977 case OMPD_barrier: 6978 case OMPD_taskwait: 6979 case OMPD_taskgroup: 6980 case OMPD_atomic: 6981 case OMPD_flush: 6982 case OMPD_depobj: 6983 case OMPD_scan: 6984 case OMPD_teams: 6985 case OMPD_target_data: 6986 case OMPD_target_exit_data: 6987 case OMPD_target_enter_data: 6988 case OMPD_distribute: 6989 case OMPD_distribute_simd: 6990 case OMPD_distribute_parallel_for: 6991 case OMPD_distribute_parallel_for_simd: 6992 case OMPD_teams_distribute: 6993 case OMPD_teams_distribute_simd: 6994 case OMPD_teams_distribute_parallel_for: 6995 case OMPD_teams_distribute_parallel_for_simd: 6996 case OMPD_target_update: 6997 case OMPD_declare_simd: 6998 case OMPD_declare_variant: 6999 case OMPD_begin_declare_variant: 7000 case OMPD_end_declare_variant: 7001 case OMPD_declare_target: 7002 case OMPD_end_declare_target: 7003 case OMPD_declare_reduction: 7004 case OMPD_declare_mapper: 7005 case OMPD_taskloop: 7006 case OMPD_taskloop_simd: 7007 case OMPD_master_taskloop: 7008 case OMPD_master_taskloop_simd: 7009 case OMPD_parallel_master_taskloop: 7010 case OMPD_parallel_master_taskloop_simd: 7011 case OMPD_requires: 7012 case OMPD_unknown: 7013 break; 7014 default: 7015 break; 7016 } 7017 llvm_unreachable("Unsupported directive kind."); 7018 } 7019 7020 namespace { 7021 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7022 7023 // Utility to handle information from clauses associated with a given 7024 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7025 // It provides a convenient interface to obtain the information and generate 7026 // code for that information. 7027 class MappableExprsHandler { 7028 public: 7029 /// Values for bit flags used to specify the mapping type for 7030 /// offloading. 7031 enum OpenMPOffloadMappingFlags : uint64_t { 7032 /// No flags 7033 OMP_MAP_NONE = 0x0, 7034 /// Allocate memory on the device and move data from host to device. 7035 OMP_MAP_TO = 0x01, 7036 /// Allocate memory on the device and move data from device to host. 7037 OMP_MAP_FROM = 0x02, 7038 /// Always perform the requested mapping action on the element, even 7039 /// if it was already mapped before. 7040 OMP_MAP_ALWAYS = 0x04, 7041 /// Delete the element from the device environment, ignoring the 7042 /// current reference count associated with the element. 7043 OMP_MAP_DELETE = 0x08, 7044 /// The element being mapped is a pointer-pointee pair; both the 7045 /// pointer and the pointee should be mapped. 7046 OMP_MAP_PTR_AND_OBJ = 0x10, 7047 /// This flags signals that the base address of an entry should be 7048 /// passed to the target kernel as an argument. 7049 OMP_MAP_TARGET_PARAM = 0x20, 7050 /// Signal that the runtime library has to return the device pointer 7051 /// in the current position for the data being mapped. Used when we have the 7052 /// use_device_ptr or use_device_addr clause. 7053 OMP_MAP_RETURN_PARAM = 0x40, 7054 /// This flag signals that the reference being passed is a pointer to 7055 /// private data. 7056 OMP_MAP_PRIVATE = 0x80, 7057 /// Pass the element to the device by value. 7058 OMP_MAP_LITERAL = 0x100, 7059 /// Implicit map 7060 OMP_MAP_IMPLICIT = 0x200, 7061 /// Close is a hint to the runtime to allocate memory close to 7062 /// the target device. 7063 OMP_MAP_CLOSE = 0x400, 7064 /// 0x800 is reserved for compatibility with XLC. 7065 /// Produce a runtime error if the data is not already allocated. 7066 OMP_MAP_PRESENT = 0x1000, 7067 /// Signal that the runtime library should use args as an array of 7068 /// descriptor_dim pointers and use args_size as dims. Used when we have 7069 /// non-contiguous list items in target update directive 7070 OMP_MAP_NON_CONTIG = 0x100000000000, 7071 /// The 16 MSBs of the flags indicate whether the entry is member of some 7072 /// struct/class. 7073 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7074 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7075 }; 7076 7077 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7078 static unsigned getFlagMemberOffset() { 7079 unsigned Offset = 0; 7080 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7081 Remain = Remain >> 1) 7082 Offset++; 7083 return Offset; 7084 } 7085 7086 /// Class that holds debugging information for a data mapping to be passed to 7087 /// the runtime library. 7088 class MappingExprInfo { 7089 /// The variable declaration used for the data mapping. 7090 const ValueDecl *MapDecl = nullptr; 7091 /// The original expression used in the map clause, or null if there is 7092 /// none. 7093 const Expr *MapExpr = nullptr; 7094 7095 public: 7096 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7097 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7098 7099 const ValueDecl *getMapDecl() const { return MapDecl; } 7100 const Expr *getMapExpr() const { return MapExpr; } 7101 }; 7102 7103 /// Class that associates information with a base pointer to be passed to the 7104 /// runtime library. 7105 class BasePointerInfo { 7106 /// The base pointer. 7107 llvm::Value *Ptr = nullptr; 7108 /// The base declaration that refers to this device pointer, or null if 7109 /// there is none. 7110 const ValueDecl *DevPtrDecl = nullptr; 7111 7112 public: 7113 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7114 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7115 llvm::Value *operator*() const { return Ptr; } 7116 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7117 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7118 }; 7119 7120 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7121 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7122 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7123 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7124 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7125 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7126 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7127 7128 /// This structure contains combined information generated for mappable 7129 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7130 /// mappers, and non-contiguous information. 7131 struct MapCombinedInfoTy { 7132 struct StructNonContiguousInfo { 7133 bool IsNonContiguous = false; 7134 MapDimArrayTy Dims; 7135 MapNonContiguousArrayTy Offsets; 7136 MapNonContiguousArrayTy Counts; 7137 MapNonContiguousArrayTy Strides; 7138 }; 7139 MapExprsArrayTy Exprs; 7140 MapBaseValuesArrayTy BasePointers; 7141 MapValuesArrayTy Pointers; 7142 MapValuesArrayTy Sizes; 7143 MapFlagsArrayTy Types; 7144 MapMappersArrayTy Mappers; 7145 StructNonContiguousInfo NonContigInfo; 7146 7147 /// Append arrays in \a CurInfo. 7148 void append(MapCombinedInfoTy &CurInfo) { 7149 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7150 BasePointers.append(CurInfo.BasePointers.begin(), 7151 CurInfo.BasePointers.end()); 7152 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7153 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7154 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7155 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7156 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7157 CurInfo.NonContigInfo.Dims.end()); 7158 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7159 CurInfo.NonContigInfo.Offsets.end()); 7160 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7161 CurInfo.NonContigInfo.Counts.end()); 7162 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7163 CurInfo.NonContigInfo.Strides.end()); 7164 } 7165 }; 7166 7167 /// Map between a struct and the its lowest & highest elements which have been 7168 /// mapped. 7169 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7170 /// HE(FieldIndex, Pointer)} 7171 struct StructRangeInfoTy { 7172 MapCombinedInfoTy PreliminaryMapData; 7173 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7174 0, Address::invalid()}; 7175 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7176 0, Address::invalid()}; 7177 Address Base = Address::invalid(); 7178 Address LB = Address::invalid(); 7179 bool IsArraySection = false; 7180 bool HasCompleteRecord = false; 7181 }; 7182 7183 private: 7184 /// Kind that defines how a device pointer has to be returned. 7185 struct MapInfo { 7186 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7187 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7188 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7189 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7190 bool ReturnDevicePointer = false; 7191 bool IsImplicit = false; 7192 const ValueDecl *Mapper = nullptr; 7193 const Expr *VarRef = nullptr; 7194 bool ForDeviceAddr = false; 7195 7196 MapInfo() = default; 7197 MapInfo( 7198 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7199 OpenMPMapClauseKind MapType, 7200 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7201 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7202 bool ReturnDevicePointer, bool IsImplicit, 7203 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7204 bool ForDeviceAddr = false) 7205 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7206 MotionModifiers(MotionModifiers), 7207 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7208 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7209 }; 7210 7211 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7212 /// member and there is no map information about it, then emission of that 7213 /// entry is deferred until the whole struct has been processed. 7214 struct DeferredDevicePtrEntryTy { 7215 const Expr *IE = nullptr; 7216 const ValueDecl *VD = nullptr; 7217 bool ForDeviceAddr = false; 7218 7219 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7220 bool ForDeviceAddr) 7221 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7222 }; 7223 7224 /// The target directive from where the mappable clauses were extracted. It 7225 /// is either a executable directive or a user-defined mapper directive. 7226 llvm::PointerUnion<const OMPExecutableDirective *, 7227 const OMPDeclareMapperDecl *> 7228 CurDir; 7229 7230 /// Function the directive is being generated for. 7231 CodeGenFunction &CGF; 7232 7233 /// Set of all first private variables in the current directive. 7234 /// bool data is set to true if the variable is implicitly marked as 7235 /// firstprivate, false otherwise. 7236 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7237 7238 /// Map between device pointer declarations and their expression components. 7239 /// The key value for declarations in 'this' is null. 7240 llvm::DenseMap< 7241 const ValueDecl *, 7242 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7243 DevPointersMap; 7244 7245 llvm::Value *getExprTypeSize(const Expr *E) const { 7246 QualType ExprTy = E->getType().getCanonicalType(); 7247 7248 // Calculate the size for array shaping expression. 7249 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7250 llvm::Value *Size = 7251 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7252 for (const Expr *SE : OAE->getDimensions()) { 7253 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7254 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7255 CGF.getContext().getSizeType(), 7256 SE->getExprLoc()); 7257 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7258 } 7259 return Size; 7260 } 7261 7262 // Reference types are ignored for mapping purposes. 7263 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7264 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7265 7266 // Given that an array section is considered a built-in type, we need to 7267 // do the calculation based on the length of the section instead of relying 7268 // on CGF.getTypeSize(E->getType()). 7269 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7270 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7271 OAE->getBase()->IgnoreParenImpCasts()) 7272 .getCanonicalType(); 7273 7274 // If there is no length associated with the expression and lower bound is 7275 // not specified too, that means we are using the whole length of the 7276 // base. 7277 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7278 !OAE->getLowerBound()) 7279 return CGF.getTypeSize(BaseTy); 7280 7281 llvm::Value *ElemSize; 7282 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7283 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7284 } else { 7285 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7286 assert(ATy && "Expecting array type if not a pointer type."); 7287 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7288 } 7289 7290 // If we don't have a length at this point, that is because we have an 7291 // array section with a single element. 7292 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7293 return ElemSize; 7294 7295 if (const Expr *LenExpr = OAE->getLength()) { 7296 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7297 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7298 CGF.getContext().getSizeType(), 7299 LenExpr->getExprLoc()); 7300 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7301 } 7302 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7303 OAE->getLowerBound() && "expected array_section[lb:]."); 7304 // Size = sizetype - lb * elemtype; 7305 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7306 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7307 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7308 CGF.getContext().getSizeType(), 7309 OAE->getLowerBound()->getExprLoc()); 7310 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7311 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7312 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7313 LengthVal = CGF.Builder.CreateSelect( 7314 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7315 return LengthVal; 7316 } 7317 return CGF.getTypeSize(ExprTy); 7318 } 7319 7320 /// Return the corresponding bits for a given map clause modifier. Add 7321 /// a flag marking the map as a pointer if requested. Add a flag marking the 7322 /// map as the first one of a series of maps that relate to the same map 7323 /// expression. 7324 OpenMPOffloadMappingFlags getMapTypeBits( 7325 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7326 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7327 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7328 OpenMPOffloadMappingFlags Bits = 7329 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7330 switch (MapType) { 7331 case OMPC_MAP_alloc: 7332 case OMPC_MAP_release: 7333 // alloc and release is the default behavior in the runtime library, i.e. 7334 // if we don't pass any bits alloc/release that is what the runtime is 7335 // going to do. Therefore, we don't need to signal anything for these two 7336 // type modifiers. 7337 break; 7338 case OMPC_MAP_to: 7339 Bits |= OMP_MAP_TO; 7340 break; 7341 case OMPC_MAP_from: 7342 Bits |= OMP_MAP_FROM; 7343 break; 7344 case OMPC_MAP_tofrom: 7345 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7346 break; 7347 case OMPC_MAP_delete: 7348 Bits |= OMP_MAP_DELETE; 7349 break; 7350 case OMPC_MAP_unknown: 7351 llvm_unreachable("Unexpected map type!"); 7352 } 7353 if (AddPtrFlag) 7354 Bits |= OMP_MAP_PTR_AND_OBJ; 7355 if (AddIsTargetParamFlag) 7356 Bits |= OMP_MAP_TARGET_PARAM; 7357 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7358 != MapModifiers.end()) 7359 Bits |= OMP_MAP_ALWAYS; 7360 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7361 != MapModifiers.end()) 7362 Bits |= OMP_MAP_CLOSE; 7363 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7364 MapModifiers.end() || 7365 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7366 MotionModifiers.end()) 7367 Bits |= OMP_MAP_PRESENT; 7368 if (IsNonContiguous) 7369 Bits |= OMP_MAP_NON_CONTIG; 7370 return Bits; 7371 } 7372 7373 /// Return true if the provided expression is a final array section. A 7374 /// final array section, is one whose length can't be proved to be one. 7375 bool isFinalArraySectionExpression(const Expr *E) const { 7376 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7377 7378 // It is not an array section and therefore not a unity-size one. 7379 if (!OASE) 7380 return false; 7381 7382 // An array section with no colon always refer to a single element. 7383 if (OASE->getColonLocFirst().isInvalid()) 7384 return false; 7385 7386 const Expr *Length = OASE->getLength(); 7387 7388 // If we don't have a length we have to check if the array has size 1 7389 // for this dimension. Also, we should always expect a length if the 7390 // base type is pointer. 7391 if (!Length) { 7392 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7393 OASE->getBase()->IgnoreParenImpCasts()) 7394 .getCanonicalType(); 7395 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7396 return ATy->getSize().getSExtValue() != 1; 7397 // If we don't have a constant dimension length, we have to consider 7398 // the current section as having any size, so it is not necessarily 7399 // unitary. If it happen to be unity size, that's user fault. 7400 return true; 7401 } 7402 7403 // Check if the length evaluates to 1. 7404 Expr::EvalResult Result; 7405 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7406 return true; // Can have more that size 1. 7407 7408 llvm::APSInt ConstLength = Result.Val.getInt(); 7409 return ConstLength.getSExtValue() != 1; 7410 } 7411 7412 /// Generate the base pointers, section pointers, sizes, map type bits, and 7413 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7414 /// map type, map or motion modifiers, and expression components. 7415 /// \a IsFirstComponent should be set to true if the provided set of 7416 /// components is the first associated with a capture. 7417 void generateInfoForComponentList( 7418 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7419 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7420 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7421 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7422 bool IsFirstComponentList, bool IsImplicit, 7423 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7424 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7425 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7426 OverlappedElements = llvm::None) const { 7427 // The following summarizes what has to be generated for each map and the 7428 // types below. The generated information is expressed in this order: 7429 // base pointer, section pointer, size, flags 7430 // (to add to the ones that come from the map type and modifier). 7431 // 7432 // double d; 7433 // int i[100]; 7434 // float *p; 7435 // 7436 // struct S1 { 7437 // int i; 7438 // float f[50]; 7439 // } 7440 // struct S2 { 7441 // int i; 7442 // float f[50]; 7443 // S1 s; 7444 // double *p; 7445 // struct S2 *ps; 7446 // int &ref; 7447 // } 7448 // S2 s; 7449 // S2 *ps; 7450 // 7451 // map(d) 7452 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7453 // 7454 // map(i) 7455 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7456 // 7457 // map(i[1:23]) 7458 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7459 // 7460 // map(p) 7461 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7462 // 7463 // map(p[1:24]) 7464 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7465 // in unified shared memory mode or for local pointers 7466 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7467 // 7468 // map(s) 7469 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7470 // 7471 // map(s.i) 7472 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7473 // 7474 // map(s.s.f) 7475 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7476 // 7477 // map(s.p) 7478 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7479 // 7480 // map(to: s.p[:22]) 7481 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7482 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7483 // &(s.p), &(s.p[0]), 22*sizeof(double), 7484 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7485 // (*) alloc space for struct members, only this is a target parameter 7486 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7487 // optimizes this entry out, same in the examples below) 7488 // (***) map the pointee (map: to) 7489 // 7490 // map(to: s.ref) 7491 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7492 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7493 // (*) alloc space for struct members, only this is a target parameter 7494 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7495 // optimizes this entry out, same in the examples below) 7496 // (***) map the pointee (map: to) 7497 // 7498 // map(s.ps) 7499 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7500 // 7501 // map(from: s.ps->s.i) 7502 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7503 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7504 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7505 // 7506 // map(to: s.ps->ps) 7507 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7508 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7509 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7510 // 7511 // map(s.ps->ps->ps) 7512 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7513 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7514 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7515 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7516 // 7517 // map(to: s.ps->ps->s.f[:22]) 7518 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7519 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7520 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7521 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7522 // 7523 // map(ps) 7524 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7525 // 7526 // map(ps->i) 7527 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7528 // 7529 // map(ps->s.f) 7530 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7531 // 7532 // map(from: ps->p) 7533 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7534 // 7535 // map(to: ps->p[:22]) 7536 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7537 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7538 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7539 // 7540 // map(ps->ps) 7541 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7542 // 7543 // map(from: ps->ps->s.i) 7544 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7545 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7546 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7547 // 7548 // map(from: ps->ps->ps) 7549 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7550 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7551 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7552 // 7553 // map(ps->ps->ps->ps) 7554 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7555 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7556 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7557 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7558 // 7559 // map(to: ps->ps->ps->s.f[:22]) 7560 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7561 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7562 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7563 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7564 // 7565 // map(to: s.f[:22]) map(from: s.p[:33]) 7566 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7567 // sizeof(double*) (**), TARGET_PARAM 7568 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7569 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7570 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7571 // (*) allocate contiguous space needed to fit all mapped members even if 7572 // we allocate space for members not mapped (in this example, 7573 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7574 // them as well because they fall between &s.f[0] and &s.p) 7575 // 7576 // map(from: s.f[:22]) map(to: ps->p[:33]) 7577 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7578 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7579 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7580 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7581 // (*) the struct this entry pertains to is the 2nd element in the list of 7582 // arguments, hence MEMBER_OF(2) 7583 // 7584 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7585 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7586 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7587 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7588 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7589 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7590 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7591 // (*) the struct this entry pertains to is the 4th element in the list 7592 // of arguments, hence MEMBER_OF(4) 7593 7594 // Track if the map information being generated is the first for a capture. 7595 bool IsCaptureFirstInfo = IsFirstComponentList; 7596 // When the variable is on a declare target link or in a to clause with 7597 // unified memory, a reference is needed to hold the host/device address 7598 // of the variable. 7599 bool RequiresReference = false; 7600 7601 // Scan the components from the base to the complete expression. 7602 auto CI = Components.rbegin(); 7603 auto CE = Components.rend(); 7604 auto I = CI; 7605 7606 // Track if the map information being generated is the first for a list of 7607 // components. 7608 bool IsExpressionFirstInfo = true; 7609 bool FirstPointerInComplexData = false; 7610 Address BP = Address::invalid(); 7611 const Expr *AssocExpr = I->getAssociatedExpression(); 7612 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7613 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7614 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7615 7616 if (isa<MemberExpr>(AssocExpr)) { 7617 // The base is the 'this' pointer. The content of the pointer is going 7618 // to be the base of the field being mapped. 7619 BP = CGF.LoadCXXThisAddress(); 7620 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7621 (OASE && 7622 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7623 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7624 } else if (OAShE && 7625 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7626 BP = Address( 7627 CGF.EmitScalarExpr(OAShE->getBase()), 7628 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7629 } else { 7630 // The base is the reference to the variable. 7631 // BP = &Var. 7632 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7633 if (const auto *VD = 7634 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7635 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7636 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7637 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7638 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7639 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7640 RequiresReference = true; 7641 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7642 } 7643 } 7644 } 7645 7646 // If the variable is a pointer and is being dereferenced (i.e. is not 7647 // the last component), the base has to be the pointer itself, not its 7648 // reference. References are ignored for mapping purposes. 7649 QualType Ty = 7650 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7651 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7652 // No need to generate individual map information for the pointer, it 7653 // can be associated with the combined storage if shared memory mode is 7654 // active or the base declaration is not global variable. 7655 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7656 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7657 !VD || VD->hasLocalStorage()) 7658 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7659 else 7660 FirstPointerInComplexData = true; 7661 ++I; 7662 } 7663 } 7664 7665 // Track whether a component of the list should be marked as MEMBER_OF some 7666 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7667 // in a component list should be marked as MEMBER_OF, all subsequent entries 7668 // do not belong to the base struct. E.g. 7669 // struct S2 s; 7670 // s.ps->ps->ps->f[:] 7671 // (1) (2) (3) (4) 7672 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7673 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7674 // is the pointee of ps(2) which is not member of struct s, so it should not 7675 // be marked as such (it is still PTR_AND_OBJ). 7676 // The variable is initialized to false so that PTR_AND_OBJ entries which 7677 // are not struct members are not considered (e.g. array of pointers to 7678 // data). 7679 bool ShouldBeMemberOf = false; 7680 7681 // Variable keeping track of whether or not we have encountered a component 7682 // in the component list which is a member expression. Useful when we have a 7683 // pointer or a final array section, in which case it is the previous 7684 // component in the list which tells us whether we have a member expression. 7685 // E.g. X.f[:] 7686 // While processing the final array section "[:]" it is "f" which tells us 7687 // whether we are dealing with a member of a declared struct. 7688 const MemberExpr *EncounteredME = nullptr; 7689 7690 // Track for the total number of dimension. Start from one for the dummy 7691 // dimension. 7692 uint64_t DimSize = 1; 7693 7694 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7695 bool IsPrevMemberReference = false; 7696 7697 for (; I != CE; ++I) { 7698 // If the current component is member of a struct (parent struct) mark it. 7699 if (!EncounteredME) { 7700 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7701 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7702 // as MEMBER_OF the parent struct. 7703 if (EncounteredME) { 7704 ShouldBeMemberOf = true; 7705 // Do not emit as complex pointer if this is actually not array-like 7706 // expression. 7707 if (FirstPointerInComplexData) { 7708 QualType Ty = std::prev(I) 7709 ->getAssociatedDeclaration() 7710 ->getType() 7711 .getNonReferenceType(); 7712 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7713 FirstPointerInComplexData = false; 7714 } 7715 } 7716 } 7717 7718 auto Next = std::next(I); 7719 7720 // We need to generate the addresses and sizes if this is the last 7721 // component, if the component is a pointer or if it is an array section 7722 // whose length can't be proved to be one. If this is a pointer, it 7723 // becomes the base address for the following components. 7724 7725 // A final array section, is one whose length can't be proved to be one. 7726 // If the map item is non-contiguous then we don't treat any array section 7727 // as final array section. 7728 bool IsFinalArraySection = 7729 !IsNonContiguous && 7730 isFinalArraySectionExpression(I->getAssociatedExpression()); 7731 7732 // If we have a declaration for the mapping use that, otherwise use 7733 // the base declaration of the map clause. 7734 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7735 ? I->getAssociatedDeclaration() 7736 : BaseDecl; 7737 7738 // Get information on whether the element is a pointer. Have to do a 7739 // special treatment for array sections given that they are built-in 7740 // types. 7741 const auto *OASE = 7742 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7743 const auto *OAShE = 7744 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7745 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7746 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7747 bool IsPointer = 7748 OAShE || 7749 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7750 .getCanonicalType() 7751 ->isAnyPointerType()) || 7752 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7753 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7754 MapDecl && 7755 MapDecl->getType()->isLValueReferenceType(); 7756 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7757 7758 if (OASE) 7759 ++DimSize; 7760 7761 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7762 IsFinalArraySection) { 7763 // If this is not the last component, we expect the pointer to be 7764 // associated with an array expression or member expression. 7765 assert((Next == CE || 7766 isa<MemberExpr>(Next->getAssociatedExpression()) || 7767 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7768 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7769 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7770 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7771 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7772 "Unexpected expression"); 7773 7774 Address LB = Address::invalid(); 7775 Address LowestElem = Address::invalid(); 7776 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7777 const MemberExpr *E) { 7778 const Expr *BaseExpr = E->getBase(); 7779 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7780 // scalar. 7781 LValue BaseLV; 7782 if (E->isArrow()) { 7783 LValueBaseInfo BaseInfo; 7784 TBAAAccessInfo TBAAInfo; 7785 Address Addr = 7786 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7787 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7788 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7789 } else { 7790 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7791 } 7792 return BaseLV; 7793 }; 7794 if (OAShE) { 7795 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7796 CGF.getContext().getTypeAlignInChars( 7797 OAShE->getBase()->getType())); 7798 } else if (IsMemberReference) { 7799 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7800 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7801 LowestElem = CGF.EmitLValueForFieldInitialization( 7802 BaseLVal, cast<FieldDecl>(MapDecl)) 7803 .getAddress(CGF); 7804 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7805 .getAddress(CGF); 7806 } else { 7807 LowestElem = LB = 7808 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7809 .getAddress(CGF); 7810 } 7811 7812 // If this component is a pointer inside the base struct then we don't 7813 // need to create any entry for it - it will be combined with the object 7814 // it is pointing to into a single PTR_AND_OBJ entry. 7815 bool IsMemberPointerOrAddr = 7816 EncounteredME && 7817 (((IsPointer || ForDeviceAddr) && 7818 I->getAssociatedExpression() == EncounteredME) || 7819 (IsPrevMemberReference && !IsPointer) || 7820 (IsMemberReference && Next != CE && 7821 !Next->getAssociatedExpression()->getType()->isPointerType())); 7822 if (!OverlappedElements.empty() && Next == CE) { 7823 // Handle base element with the info for overlapped elements. 7824 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7825 assert(!IsPointer && 7826 "Unexpected base element with the pointer type."); 7827 // Mark the whole struct as the struct that requires allocation on the 7828 // device. 7829 PartialStruct.LowestElem = {0, LowestElem}; 7830 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7831 I->getAssociatedExpression()->getType()); 7832 Address HB = CGF.Builder.CreateConstGEP( 7833 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7834 CGF.VoidPtrTy), 7835 TypeSize.getQuantity() - 1); 7836 PartialStruct.HighestElem = { 7837 std::numeric_limits<decltype( 7838 PartialStruct.HighestElem.first)>::max(), 7839 HB}; 7840 PartialStruct.Base = BP; 7841 PartialStruct.LB = LB; 7842 assert( 7843 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7844 "Overlapped elements must be used only once for the variable."); 7845 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7846 // Emit data for non-overlapped data. 7847 OpenMPOffloadMappingFlags Flags = 7848 OMP_MAP_MEMBER_OF | 7849 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7850 /*AddPtrFlag=*/false, 7851 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7852 llvm::Value *Size = nullptr; 7853 // Do bitcopy of all non-overlapped structure elements. 7854 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7855 Component : OverlappedElements) { 7856 Address ComponentLB = Address::invalid(); 7857 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7858 Component) { 7859 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7860 const auto *FD = dyn_cast<FieldDecl>(VD); 7861 if (FD && FD->getType()->isLValueReferenceType()) { 7862 const auto *ME = 7863 cast<MemberExpr>(MC.getAssociatedExpression()); 7864 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7865 ComponentLB = 7866 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7867 .getAddress(CGF); 7868 } else { 7869 ComponentLB = 7870 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7871 .getAddress(CGF); 7872 } 7873 Size = CGF.Builder.CreatePtrDiff( 7874 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7875 CGF.EmitCastToVoidPtr(LB.getPointer())); 7876 break; 7877 } 7878 } 7879 assert(Size && "Failed to determine structure size"); 7880 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7881 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7882 CombinedInfo.Pointers.push_back(LB.getPointer()); 7883 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7884 Size, CGF.Int64Ty, /*isSigned=*/true)); 7885 CombinedInfo.Types.push_back(Flags); 7886 CombinedInfo.Mappers.push_back(nullptr); 7887 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7888 : 1); 7889 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7890 } 7891 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7892 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7893 CombinedInfo.Pointers.push_back(LB.getPointer()); 7894 Size = CGF.Builder.CreatePtrDiff( 7895 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7896 CGF.EmitCastToVoidPtr(LB.getPointer())); 7897 CombinedInfo.Sizes.push_back( 7898 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7899 CombinedInfo.Types.push_back(Flags); 7900 CombinedInfo.Mappers.push_back(nullptr); 7901 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7902 : 1); 7903 break; 7904 } 7905 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7906 if (!IsMemberPointerOrAddr || 7907 (Next == CE && MapType != OMPC_MAP_unknown)) { 7908 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7909 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7910 CombinedInfo.Pointers.push_back(LB.getPointer()); 7911 CombinedInfo.Sizes.push_back( 7912 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7913 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7914 : 1); 7915 7916 // If Mapper is valid, the last component inherits the mapper. 7917 bool HasMapper = Mapper && Next == CE; 7918 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7919 7920 // We need to add a pointer flag for each map that comes from the 7921 // same expression except for the first one. We also need to signal 7922 // this map is the first one that relates with the current capture 7923 // (there is a set of entries for each capture). 7924 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7925 MapType, MapModifiers, MotionModifiers, IsImplicit, 7926 !IsExpressionFirstInfo || RequiresReference || 7927 FirstPointerInComplexData || IsMemberReference, 7928 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7929 7930 if (!IsExpressionFirstInfo || IsMemberReference) { 7931 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7932 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7933 if (IsPointer || (IsMemberReference && Next != CE)) 7934 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7935 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7936 7937 if (ShouldBeMemberOf) { 7938 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7939 // should be later updated with the correct value of MEMBER_OF. 7940 Flags |= OMP_MAP_MEMBER_OF; 7941 // From now on, all subsequent PTR_AND_OBJ entries should not be 7942 // marked as MEMBER_OF. 7943 ShouldBeMemberOf = false; 7944 } 7945 } 7946 7947 CombinedInfo.Types.push_back(Flags); 7948 } 7949 7950 // If we have encountered a member expression so far, keep track of the 7951 // mapped member. If the parent is "*this", then the value declaration 7952 // is nullptr. 7953 if (EncounteredME) { 7954 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7955 unsigned FieldIndex = FD->getFieldIndex(); 7956 7957 // Update info about the lowest and highest elements for this struct 7958 if (!PartialStruct.Base.isValid()) { 7959 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7960 if (IsFinalArraySection) { 7961 Address HB = 7962 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7963 .getAddress(CGF); 7964 PartialStruct.HighestElem = {FieldIndex, HB}; 7965 } else { 7966 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7967 } 7968 PartialStruct.Base = BP; 7969 PartialStruct.LB = BP; 7970 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7971 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7972 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7973 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7974 } 7975 } 7976 7977 // Need to emit combined struct for array sections. 7978 if (IsFinalArraySection || IsNonContiguous) 7979 PartialStruct.IsArraySection = true; 7980 7981 // If we have a final array section, we are done with this expression. 7982 if (IsFinalArraySection) 7983 break; 7984 7985 // The pointer becomes the base for the next element. 7986 if (Next != CE) 7987 BP = IsMemberReference ? LowestElem : LB; 7988 7989 IsExpressionFirstInfo = false; 7990 IsCaptureFirstInfo = false; 7991 FirstPointerInComplexData = false; 7992 IsPrevMemberReference = IsMemberReference; 7993 } else if (FirstPointerInComplexData) { 7994 QualType Ty = Components.rbegin() 7995 ->getAssociatedDeclaration() 7996 ->getType() 7997 .getNonReferenceType(); 7998 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7999 FirstPointerInComplexData = false; 8000 } 8001 } 8002 // If ran into the whole component - allocate the space for the whole 8003 // record. 8004 if (!EncounteredME) 8005 PartialStruct.HasCompleteRecord = true; 8006 8007 if (!IsNonContiguous) 8008 return; 8009 8010 const ASTContext &Context = CGF.getContext(); 8011 8012 // For supporting stride in array section, we need to initialize the first 8013 // dimension size as 1, first offset as 0, and first count as 1 8014 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8015 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8016 MapValuesArrayTy CurStrides; 8017 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8018 uint64_t ElementTypeSize; 8019 8020 // Collect Size information for each dimension and get the element size as 8021 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8022 // should be [10, 10] and the first stride is 4 btyes. 8023 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8024 Components) { 8025 const Expr *AssocExpr = Component.getAssociatedExpression(); 8026 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8027 8028 if (!OASE) 8029 continue; 8030 8031 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8032 auto *CAT = Context.getAsConstantArrayType(Ty); 8033 auto *VAT = Context.getAsVariableArrayType(Ty); 8034 8035 // We need all the dimension size except for the last dimension. 8036 assert((VAT || CAT || &Component == &*Components.begin()) && 8037 "Should be either ConstantArray or VariableArray if not the " 8038 "first Component"); 8039 8040 // Get element size if CurStrides is empty. 8041 if (CurStrides.empty()) { 8042 const Type *ElementType = nullptr; 8043 if (CAT) 8044 ElementType = CAT->getElementType().getTypePtr(); 8045 else if (VAT) 8046 ElementType = VAT->getElementType().getTypePtr(); 8047 else 8048 assert(&Component == &*Components.begin() && 8049 "Only expect pointer (non CAT or VAT) when this is the " 8050 "first Component"); 8051 // If ElementType is null, then it means the base is a pointer 8052 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8053 // for next iteration. 8054 if (ElementType) { 8055 // For the case that having pointer as base, we need to remove one 8056 // level of indirection. 8057 if (&Component != &*Components.begin()) 8058 ElementType = ElementType->getPointeeOrArrayElementType(); 8059 ElementTypeSize = 8060 Context.getTypeSizeInChars(ElementType).getQuantity(); 8061 CurStrides.push_back( 8062 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8063 } 8064 } 8065 // Get dimension value except for the last dimension since we don't need 8066 // it. 8067 if (DimSizes.size() < Components.size() - 1) { 8068 if (CAT) 8069 DimSizes.push_back(llvm::ConstantInt::get( 8070 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8071 else if (VAT) 8072 DimSizes.push_back(CGF.Builder.CreateIntCast( 8073 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8074 /*IsSigned=*/false)); 8075 } 8076 } 8077 8078 // Skip the dummy dimension since we have already have its information. 8079 auto DI = DimSizes.begin() + 1; 8080 // Product of dimension. 8081 llvm::Value *DimProd = 8082 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8083 8084 // Collect info for non-contiguous. Notice that offset, count, and stride 8085 // are only meaningful for array-section, so we insert a null for anything 8086 // other than array-section. 8087 // Also, the size of offset, count, and stride are not the same as 8088 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8089 // count, and stride are the same as the number of non-contiguous 8090 // declaration in target update to/from clause. 8091 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8092 Components) { 8093 const Expr *AssocExpr = Component.getAssociatedExpression(); 8094 8095 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8096 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8097 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8098 /*isSigned=*/false); 8099 CurOffsets.push_back(Offset); 8100 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8101 CurStrides.push_back(CurStrides.back()); 8102 continue; 8103 } 8104 8105 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8106 8107 if (!OASE) 8108 continue; 8109 8110 // Offset 8111 const Expr *OffsetExpr = OASE->getLowerBound(); 8112 llvm::Value *Offset = nullptr; 8113 if (!OffsetExpr) { 8114 // If offset is absent, then we just set it to zero. 8115 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8116 } else { 8117 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8118 CGF.Int64Ty, 8119 /*isSigned=*/false); 8120 } 8121 CurOffsets.push_back(Offset); 8122 8123 // Count 8124 const Expr *CountExpr = OASE->getLength(); 8125 llvm::Value *Count = nullptr; 8126 if (!CountExpr) { 8127 // In Clang, once a high dimension is an array section, we construct all 8128 // the lower dimension as array section, however, for case like 8129 // arr[0:2][2], Clang construct the inner dimension as an array section 8130 // but it actually is not in an array section form according to spec. 8131 if (!OASE->getColonLocFirst().isValid() && 8132 !OASE->getColonLocSecond().isValid()) { 8133 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8134 } else { 8135 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8136 // When the length is absent it defaults to ⌈(size − 8137 // lower-bound)/stride⌉, where size is the size of the array 8138 // dimension. 8139 const Expr *StrideExpr = OASE->getStride(); 8140 llvm::Value *Stride = 8141 StrideExpr 8142 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8143 CGF.Int64Ty, /*isSigned=*/false) 8144 : nullptr; 8145 if (Stride) 8146 Count = CGF.Builder.CreateUDiv( 8147 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8148 else 8149 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8150 } 8151 } else { 8152 Count = CGF.EmitScalarExpr(CountExpr); 8153 } 8154 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8155 CurCounts.push_back(Count); 8156 8157 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8158 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8159 // Offset Count Stride 8160 // D0 0 1 4 (int) <- dummy dimension 8161 // D1 0 2 8 (2 * (1) * 4) 8162 // D2 1 2 20 (1 * (1 * 5) * 4) 8163 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8164 const Expr *StrideExpr = OASE->getStride(); 8165 llvm::Value *Stride = 8166 StrideExpr 8167 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8168 CGF.Int64Ty, /*isSigned=*/false) 8169 : nullptr; 8170 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8171 if (Stride) 8172 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8173 else 8174 CurStrides.push_back(DimProd); 8175 if (DI != DimSizes.end()) 8176 ++DI; 8177 } 8178 8179 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8180 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8181 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8182 } 8183 8184 /// Return the adjusted map modifiers if the declaration a capture refers to 8185 /// appears in a first-private clause. This is expected to be used only with 8186 /// directives that start with 'target'. 8187 MappableExprsHandler::OpenMPOffloadMappingFlags 8188 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8189 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8190 8191 // A first private variable captured by reference will use only the 8192 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8193 // declaration is known as first-private in this handler. 8194 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8195 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8196 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8197 return MappableExprsHandler::OMP_MAP_ALWAYS | 8198 MappableExprsHandler::OMP_MAP_TO; 8199 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8200 return MappableExprsHandler::OMP_MAP_TO | 8201 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8202 return MappableExprsHandler::OMP_MAP_PRIVATE | 8203 MappableExprsHandler::OMP_MAP_TO; 8204 } 8205 return MappableExprsHandler::OMP_MAP_TO | 8206 MappableExprsHandler::OMP_MAP_FROM; 8207 } 8208 8209 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8210 // Rotate by getFlagMemberOffset() bits. 8211 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8212 << getFlagMemberOffset()); 8213 } 8214 8215 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8216 OpenMPOffloadMappingFlags MemberOfFlag) { 8217 // If the entry is PTR_AND_OBJ but has not been marked with the special 8218 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8219 // marked as MEMBER_OF. 8220 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8221 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8222 return; 8223 8224 // Reset the placeholder value to prepare the flag for the assignment of the 8225 // proper MEMBER_OF value. 8226 Flags &= ~OMP_MAP_MEMBER_OF; 8227 Flags |= MemberOfFlag; 8228 } 8229 8230 void getPlainLayout(const CXXRecordDecl *RD, 8231 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8232 bool AsBase) const { 8233 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8234 8235 llvm::StructType *St = 8236 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8237 8238 unsigned NumElements = St->getNumElements(); 8239 llvm::SmallVector< 8240 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8241 RecordLayout(NumElements); 8242 8243 // Fill bases. 8244 for (const auto &I : RD->bases()) { 8245 if (I.isVirtual()) 8246 continue; 8247 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8248 // Ignore empty bases. 8249 if (Base->isEmpty() || CGF.getContext() 8250 .getASTRecordLayout(Base) 8251 .getNonVirtualSize() 8252 .isZero()) 8253 continue; 8254 8255 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8256 RecordLayout[FieldIndex] = Base; 8257 } 8258 // Fill in virtual bases. 8259 for (const auto &I : RD->vbases()) { 8260 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8261 // Ignore empty bases. 8262 if (Base->isEmpty()) 8263 continue; 8264 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8265 if (RecordLayout[FieldIndex]) 8266 continue; 8267 RecordLayout[FieldIndex] = Base; 8268 } 8269 // Fill in all the fields. 8270 assert(!RD->isUnion() && "Unexpected union."); 8271 for (const auto *Field : RD->fields()) { 8272 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8273 // will fill in later.) 8274 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8275 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8276 RecordLayout[FieldIndex] = Field; 8277 } 8278 } 8279 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8280 &Data : RecordLayout) { 8281 if (Data.isNull()) 8282 continue; 8283 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8284 getPlainLayout(Base, Layout, /*AsBase=*/true); 8285 else 8286 Layout.push_back(Data.get<const FieldDecl *>()); 8287 } 8288 } 8289 8290 /// Generate all the base pointers, section pointers, sizes, map types, and 8291 /// mappers for the extracted mappable expressions (all included in \a 8292 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8293 /// pair of the relevant declaration and index where it occurs is appended to 8294 /// the device pointers info array. 8295 void generateAllInfoForClauses( 8296 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8297 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8298 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8299 // We have to process the component lists that relate with the same 8300 // declaration in a single chunk so that we can generate the map flags 8301 // correctly. Therefore, we organize all lists in a map. 8302 enum MapKind { Present, Allocs, Other, Total }; 8303 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8304 SmallVector<SmallVector<MapInfo, 8>, 4>> 8305 Info; 8306 8307 // Helper function to fill the information map for the different supported 8308 // clauses. 8309 auto &&InfoGen = 8310 [&Info, &SkipVarSet]( 8311 const ValueDecl *D, MapKind Kind, 8312 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8313 OpenMPMapClauseKind MapType, 8314 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8315 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8316 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8317 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8318 if (SkipVarSet.contains(D)) 8319 return; 8320 auto It = Info.find(D); 8321 if (It == Info.end()) 8322 It = Info 8323 .insert(std::make_pair( 8324 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8325 .first; 8326 It->second[Kind].emplace_back( 8327 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8328 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8329 }; 8330 8331 for (const auto *Cl : Clauses) { 8332 const auto *C = dyn_cast<OMPMapClause>(Cl); 8333 if (!C) 8334 continue; 8335 MapKind Kind = Other; 8336 if (!C->getMapTypeModifiers().empty() && 8337 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8338 return K == OMPC_MAP_MODIFIER_present; 8339 })) 8340 Kind = Present; 8341 else if (C->getMapType() == OMPC_MAP_alloc) 8342 Kind = Allocs; 8343 const auto *EI = C->getVarRefs().begin(); 8344 for (const auto L : C->component_lists()) { 8345 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8346 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8347 C->getMapTypeModifiers(), llvm::None, 8348 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8349 E); 8350 ++EI; 8351 } 8352 } 8353 for (const auto *Cl : Clauses) { 8354 const auto *C = dyn_cast<OMPToClause>(Cl); 8355 if (!C) 8356 continue; 8357 MapKind Kind = Other; 8358 if (!C->getMotionModifiers().empty() && 8359 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8360 return K == OMPC_MOTION_MODIFIER_present; 8361 })) 8362 Kind = Present; 8363 const auto *EI = C->getVarRefs().begin(); 8364 for (const auto L : C->component_lists()) { 8365 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8366 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8367 C->isImplicit(), std::get<2>(L), *EI); 8368 ++EI; 8369 } 8370 } 8371 for (const auto *Cl : Clauses) { 8372 const auto *C = dyn_cast<OMPFromClause>(Cl); 8373 if (!C) 8374 continue; 8375 MapKind Kind = Other; 8376 if (!C->getMotionModifiers().empty() && 8377 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8378 return K == OMPC_MOTION_MODIFIER_present; 8379 })) 8380 Kind = Present; 8381 const auto *EI = C->getVarRefs().begin(); 8382 for (const auto L : C->component_lists()) { 8383 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8384 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8385 C->isImplicit(), std::get<2>(L), *EI); 8386 ++EI; 8387 } 8388 } 8389 8390 // Look at the use_device_ptr clause information and mark the existing map 8391 // entries as such. If there is no map information for an entry in the 8392 // use_device_ptr list, we create one with map type 'alloc' and zero size 8393 // section. It is the user fault if that was not mapped before. If there is 8394 // no map information and the pointer is a struct member, then we defer the 8395 // emission of that entry until the whole struct has been processed. 8396 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8397 SmallVector<DeferredDevicePtrEntryTy, 4>> 8398 DeferredInfo; 8399 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8400 8401 for (const auto *Cl : Clauses) { 8402 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8403 if (!C) 8404 continue; 8405 for (const auto L : C->component_lists()) { 8406 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8407 std::get<1>(L); 8408 assert(!Components.empty() && 8409 "Not expecting empty list of components!"); 8410 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8411 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8412 const Expr *IE = Components.back().getAssociatedExpression(); 8413 // If the first component is a member expression, we have to look into 8414 // 'this', which maps to null in the map of map information. Otherwise 8415 // look directly for the information. 8416 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8417 8418 // We potentially have map information for this declaration already. 8419 // Look for the first set of components that refer to it. 8420 if (It != Info.end()) { 8421 bool Found = false; 8422 for (auto &Data : It->second) { 8423 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8424 return MI.Components.back().getAssociatedDeclaration() == VD; 8425 }); 8426 // If we found a map entry, signal that the pointer has to be 8427 // returned and move on to the next declaration. Exclude cases where 8428 // the base pointer is mapped as array subscript, array section or 8429 // array shaping. The base address is passed as a pointer to base in 8430 // this case and cannot be used as a base for use_device_ptr list 8431 // item. 8432 if (CI != Data.end()) { 8433 auto PrevCI = std::next(CI->Components.rbegin()); 8434 const auto *VarD = dyn_cast<VarDecl>(VD); 8435 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8436 isa<MemberExpr>(IE) || 8437 !VD->getType().getNonReferenceType()->isPointerType() || 8438 PrevCI == CI->Components.rend() || 8439 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8440 VarD->hasLocalStorage()) { 8441 CI->ReturnDevicePointer = true; 8442 Found = true; 8443 break; 8444 } 8445 } 8446 } 8447 if (Found) 8448 continue; 8449 } 8450 8451 // We didn't find any match in our map information - generate a zero 8452 // size array section - if the pointer is a struct member we defer this 8453 // action until the whole struct has been processed. 8454 if (isa<MemberExpr>(IE)) { 8455 // Insert the pointer into Info to be processed by 8456 // generateInfoForComponentList. Because it is a member pointer 8457 // without a pointee, no entry will be generated for it, therefore 8458 // we need to generate one after the whole struct has been processed. 8459 // Nonetheless, generateInfoForComponentList must be called to take 8460 // the pointer into account for the calculation of the range of the 8461 // partial struct. 8462 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8463 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8464 nullptr); 8465 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8466 } else { 8467 llvm::Value *Ptr = 8468 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8469 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8470 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8471 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8472 UseDevicePtrCombinedInfo.Sizes.push_back( 8473 llvm::Constant::getNullValue(CGF.Int64Ty)); 8474 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8475 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8476 } 8477 } 8478 } 8479 8480 // Look at the use_device_addr clause information and mark the existing map 8481 // entries as such. If there is no map information for an entry in the 8482 // use_device_addr list, we create one with map type 'alloc' and zero size 8483 // section. It is the user fault if that was not mapped before. If there is 8484 // no map information and the pointer is a struct member, then we defer the 8485 // emission of that entry until the whole struct has been processed. 8486 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8487 for (const auto *Cl : Clauses) { 8488 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8489 if (!C) 8490 continue; 8491 for (const auto L : C->component_lists()) { 8492 assert(!std::get<1>(L).empty() && 8493 "Not expecting empty list of components!"); 8494 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8495 if (!Processed.insert(VD).second) 8496 continue; 8497 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8498 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8499 // If the first component is a member expression, we have to look into 8500 // 'this', which maps to null in the map of map information. Otherwise 8501 // look directly for the information. 8502 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8503 8504 // We potentially have map information for this declaration already. 8505 // Look for the first set of components that refer to it. 8506 if (It != Info.end()) { 8507 bool Found = false; 8508 for (auto &Data : It->second) { 8509 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8510 return MI.Components.back().getAssociatedDeclaration() == VD; 8511 }); 8512 // If we found a map entry, signal that the pointer has to be 8513 // returned and move on to the next declaration. 8514 if (CI != Data.end()) { 8515 CI->ReturnDevicePointer = true; 8516 Found = true; 8517 break; 8518 } 8519 } 8520 if (Found) 8521 continue; 8522 } 8523 8524 // We didn't find any match in our map information - generate a zero 8525 // size array section - if the pointer is a struct member we defer this 8526 // action until the whole struct has been processed. 8527 if (isa<MemberExpr>(IE)) { 8528 // Insert the pointer into Info to be processed by 8529 // generateInfoForComponentList. Because it is a member pointer 8530 // without a pointee, no entry will be generated for it, therefore 8531 // we need to generate one after the whole struct has been processed. 8532 // Nonetheless, generateInfoForComponentList must be called to take 8533 // the pointer into account for the calculation of the range of the 8534 // partial struct. 8535 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8536 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8537 nullptr, nullptr, /*ForDeviceAddr=*/true); 8538 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8539 } else { 8540 llvm::Value *Ptr; 8541 if (IE->isGLValue()) 8542 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8543 else 8544 Ptr = CGF.EmitScalarExpr(IE); 8545 CombinedInfo.Exprs.push_back(VD); 8546 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8547 CombinedInfo.Pointers.push_back(Ptr); 8548 CombinedInfo.Sizes.push_back( 8549 llvm::Constant::getNullValue(CGF.Int64Ty)); 8550 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8551 CombinedInfo.Mappers.push_back(nullptr); 8552 } 8553 } 8554 } 8555 8556 for (const auto &Data : Info) { 8557 StructRangeInfoTy PartialStruct; 8558 // Temporary generated information. 8559 MapCombinedInfoTy CurInfo; 8560 const Decl *D = Data.first; 8561 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8562 for (const auto &M : Data.second) { 8563 for (const MapInfo &L : M) { 8564 assert(!L.Components.empty() && 8565 "Not expecting declaration with no component lists."); 8566 8567 // Remember the current base pointer index. 8568 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8569 CurInfo.NonContigInfo.IsNonContiguous = 8570 L.Components.back().isNonContiguous(); 8571 generateInfoForComponentList( 8572 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8573 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8574 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8575 8576 // If this entry relates with a device pointer, set the relevant 8577 // declaration and add the 'return pointer' flag. 8578 if (L.ReturnDevicePointer) { 8579 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8580 "Unexpected number of mapped base pointers."); 8581 8582 const ValueDecl *RelevantVD = 8583 L.Components.back().getAssociatedDeclaration(); 8584 assert(RelevantVD && 8585 "No relevant declaration related with device pointer??"); 8586 8587 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8588 RelevantVD); 8589 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8590 } 8591 } 8592 } 8593 8594 // Append any pending zero-length pointers which are struct members and 8595 // used with use_device_ptr or use_device_addr. 8596 auto CI = DeferredInfo.find(Data.first); 8597 if (CI != DeferredInfo.end()) { 8598 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8599 llvm::Value *BasePtr; 8600 llvm::Value *Ptr; 8601 if (L.ForDeviceAddr) { 8602 if (L.IE->isGLValue()) 8603 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8604 else 8605 Ptr = this->CGF.EmitScalarExpr(L.IE); 8606 BasePtr = Ptr; 8607 // Entry is RETURN_PARAM. Also, set the placeholder value 8608 // MEMBER_OF=FFFF so that the entry is later updated with the 8609 // correct value of MEMBER_OF. 8610 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8611 } else { 8612 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8613 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8614 L.IE->getExprLoc()); 8615 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8616 // placeholder value MEMBER_OF=FFFF so that the entry is later 8617 // updated with the correct value of MEMBER_OF. 8618 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8619 OMP_MAP_MEMBER_OF); 8620 } 8621 CurInfo.Exprs.push_back(L.VD); 8622 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8623 CurInfo.Pointers.push_back(Ptr); 8624 CurInfo.Sizes.push_back( 8625 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8626 CurInfo.Mappers.push_back(nullptr); 8627 } 8628 } 8629 // If there is an entry in PartialStruct it means we have a struct with 8630 // individual members mapped. Emit an extra combined entry. 8631 if (PartialStruct.Base.isValid()) { 8632 CurInfo.NonContigInfo.Dims.push_back(0); 8633 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8634 } 8635 8636 // We need to append the results of this capture to what we already 8637 // have. 8638 CombinedInfo.append(CurInfo); 8639 } 8640 // Append data for use_device_ptr clauses. 8641 CombinedInfo.append(UseDevicePtrCombinedInfo); 8642 } 8643 8644 public: 8645 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8646 : CurDir(&Dir), CGF(CGF) { 8647 // Extract firstprivate clause information. 8648 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8649 for (const auto *D : C->varlists()) 8650 FirstPrivateDecls.try_emplace( 8651 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8652 // Extract implicit firstprivates from uses_allocators clauses. 8653 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8654 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8655 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8656 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8657 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8658 /*Implicit=*/true); 8659 else if (const auto *VD = dyn_cast<VarDecl>( 8660 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8661 ->getDecl())) 8662 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8663 } 8664 } 8665 // Extract device pointer clause information. 8666 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8667 for (auto L : C->component_lists()) 8668 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8669 } 8670 8671 /// Constructor for the declare mapper directive. 8672 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8673 : CurDir(&Dir), CGF(CGF) {} 8674 8675 /// Generate code for the combined entry if we have a partially mapped struct 8676 /// and take care of the mapping flags of the arguments corresponding to 8677 /// individual struct members. 8678 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8679 MapFlagsArrayTy &CurTypes, 8680 const StructRangeInfoTy &PartialStruct, 8681 const ValueDecl *VD = nullptr, 8682 bool NotTargetParams = true) const { 8683 if (CurTypes.size() == 1 && 8684 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8685 !PartialStruct.IsArraySection) 8686 return; 8687 Address LBAddr = PartialStruct.LowestElem.second; 8688 Address HBAddr = PartialStruct.HighestElem.second; 8689 if (PartialStruct.HasCompleteRecord) { 8690 LBAddr = PartialStruct.LB; 8691 HBAddr = PartialStruct.LB; 8692 } 8693 CombinedInfo.Exprs.push_back(VD); 8694 // Base is the base of the struct 8695 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8696 // Pointer is the address of the lowest element 8697 llvm::Value *LB = LBAddr.getPointer(); 8698 CombinedInfo.Pointers.push_back(LB); 8699 // There should not be a mapper for a combined entry. 8700 CombinedInfo.Mappers.push_back(nullptr); 8701 // Size is (addr of {highest+1} element) - (addr of lowest element) 8702 llvm::Value *HB = HBAddr.getPointer(); 8703 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8704 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8705 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8706 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8707 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8708 /*isSigned=*/false); 8709 CombinedInfo.Sizes.push_back(Size); 8710 // Map type is always TARGET_PARAM, if generate info for captures. 8711 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8712 : OMP_MAP_TARGET_PARAM); 8713 // If any element has the present modifier, then make sure the runtime 8714 // doesn't attempt to allocate the struct. 8715 if (CurTypes.end() != 8716 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8717 return Type & OMP_MAP_PRESENT; 8718 })) 8719 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8720 // Remove TARGET_PARAM flag from the first element 8721 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8722 8723 // All other current entries will be MEMBER_OF the combined entry 8724 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8725 // 0xFFFF in the MEMBER_OF field). 8726 OpenMPOffloadMappingFlags MemberOfFlag = 8727 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8728 for (auto &M : CurTypes) 8729 setCorrectMemberOfFlag(M, MemberOfFlag); 8730 } 8731 8732 /// Generate all the base pointers, section pointers, sizes, map types, and 8733 /// mappers for the extracted mappable expressions (all included in \a 8734 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8735 /// pair of the relevant declaration and index where it occurs is appended to 8736 /// the device pointers info array. 8737 void generateAllInfo( 8738 MapCombinedInfoTy &CombinedInfo, 8739 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8740 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8741 assert(CurDir.is<const OMPExecutableDirective *>() && 8742 "Expect a executable directive"); 8743 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8744 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8745 } 8746 8747 /// Generate all the base pointers, section pointers, sizes, map types, and 8748 /// mappers for the extracted map clauses of user-defined mapper (all included 8749 /// in \a CombinedInfo). 8750 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8751 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8752 "Expect a declare mapper directive"); 8753 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8754 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8755 } 8756 8757 /// Emit capture info for lambdas for variables captured by reference. 8758 void generateInfoForLambdaCaptures( 8759 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8760 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8761 const auto *RD = VD->getType() 8762 .getCanonicalType() 8763 .getNonReferenceType() 8764 ->getAsCXXRecordDecl(); 8765 if (!RD || !RD->isLambda()) 8766 return; 8767 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8768 LValue VDLVal = CGF.MakeAddrLValue( 8769 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8770 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8771 FieldDecl *ThisCapture = nullptr; 8772 RD->getCaptureFields(Captures, ThisCapture); 8773 if (ThisCapture) { 8774 LValue ThisLVal = 8775 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8776 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8777 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8778 VDLVal.getPointer(CGF)); 8779 CombinedInfo.Exprs.push_back(VD); 8780 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8781 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8782 CombinedInfo.Sizes.push_back( 8783 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8784 CGF.Int64Ty, /*isSigned=*/true)); 8785 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8786 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8787 CombinedInfo.Mappers.push_back(nullptr); 8788 } 8789 for (const LambdaCapture &LC : RD->captures()) { 8790 if (!LC.capturesVariable()) 8791 continue; 8792 const VarDecl *VD = LC.getCapturedVar(); 8793 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8794 continue; 8795 auto It = Captures.find(VD); 8796 assert(It != Captures.end() && "Found lambda capture without field."); 8797 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8798 if (LC.getCaptureKind() == LCK_ByRef) { 8799 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8800 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8801 VDLVal.getPointer(CGF)); 8802 CombinedInfo.Exprs.push_back(VD); 8803 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8804 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8805 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8806 CGF.getTypeSize( 8807 VD->getType().getCanonicalType().getNonReferenceType()), 8808 CGF.Int64Ty, /*isSigned=*/true)); 8809 } else { 8810 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8811 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8812 VDLVal.getPointer(CGF)); 8813 CombinedInfo.Exprs.push_back(VD); 8814 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8815 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8816 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8817 } 8818 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8819 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8820 CombinedInfo.Mappers.push_back(nullptr); 8821 } 8822 } 8823 8824 /// Set correct indices for lambdas captures. 8825 void adjustMemberOfForLambdaCaptures( 8826 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8827 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8828 MapFlagsArrayTy &Types) const { 8829 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8830 // Set correct member_of idx for all implicit lambda captures. 8831 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8832 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8833 continue; 8834 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8835 assert(BasePtr && "Unable to find base lambda address."); 8836 int TgtIdx = -1; 8837 for (unsigned J = I; J > 0; --J) { 8838 unsigned Idx = J - 1; 8839 if (Pointers[Idx] != BasePtr) 8840 continue; 8841 TgtIdx = Idx; 8842 break; 8843 } 8844 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8845 // All other current entries will be MEMBER_OF the combined entry 8846 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8847 // 0xFFFF in the MEMBER_OF field). 8848 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8849 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8850 } 8851 } 8852 8853 /// Generate the base pointers, section pointers, sizes, map types, and 8854 /// mappers associated to a given capture (all included in \a CombinedInfo). 8855 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8856 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8857 StructRangeInfoTy &PartialStruct) const { 8858 assert(!Cap->capturesVariableArrayType() && 8859 "Not expecting to generate map info for a variable array type!"); 8860 8861 // We need to know when we generating information for the first component 8862 const ValueDecl *VD = Cap->capturesThis() 8863 ? nullptr 8864 : Cap->getCapturedVar()->getCanonicalDecl(); 8865 8866 // If this declaration appears in a is_device_ptr clause we just have to 8867 // pass the pointer by value. If it is a reference to a declaration, we just 8868 // pass its value. 8869 if (DevPointersMap.count(VD)) { 8870 CombinedInfo.Exprs.push_back(VD); 8871 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8872 CombinedInfo.Pointers.push_back(Arg); 8873 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8874 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8875 /*isSigned=*/true)); 8876 CombinedInfo.Types.push_back( 8877 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8878 OMP_MAP_TARGET_PARAM); 8879 CombinedInfo.Mappers.push_back(nullptr); 8880 return; 8881 } 8882 8883 using MapData = 8884 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8885 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8886 const ValueDecl *, const Expr *>; 8887 SmallVector<MapData, 4> DeclComponentLists; 8888 assert(CurDir.is<const OMPExecutableDirective *>() && 8889 "Expect a executable directive"); 8890 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8891 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8892 const auto *EI = C->getVarRefs().begin(); 8893 for (const auto L : C->decl_component_lists(VD)) { 8894 const ValueDecl *VDecl, *Mapper; 8895 // The Expression is not correct if the mapping is implicit 8896 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8897 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8898 std::tie(VDecl, Components, Mapper) = L; 8899 assert(VDecl == VD && "We got information for the wrong declaration??"); 8900 assert(!Components.empty() && 8901 "Not expecting declaration with no component lists."); 8902 DeclComponentLists.emplace_back(Components, C->getMapType(), 8903 C->getMapTypeModifiers(), 8904 C->isImplicit(), Mapper, E); 8905 ++EI; 8906 } 8907 } 8908 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8909 const MapData &RHS) { 8910 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8911 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8912 bool HasPresent = !MapModifiers.empty() && 8913 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8914 return K == clang::OMPC_MAP_MODIFIER_present; 8915 }); 8916 bool HasAllocs = MapType == OMPC_MAP_alloc; 8917 MapModifiers = std::get<2>(RHS); 8918 MapType = std::get<1>(LHS); 8919 bool HasPresentR = 8920 !MapModifiers.empty() && 8921 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8922 return K == clang::OMPC_MAP_MODIFIER_present; 8923 }); 8924 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8925 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8926 }); 8927 8928 // Find overlapping elements (including the offset from the base element). 8929 llvm::SmallDenseMap< 8930 const MapData *, 8931 llvm::SmallVector< 8932 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8933 4> 8934 OverlappedData; 8935 size_t Count = 0; 8936 for (const MapData &L : DeclComponentLists) { 8937 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8938 OpenMPMapClauseKind MapType; 8939 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8940 bool IsImplicit; 8941 const ValueDecl *Mapper; 8942 const Expr *VarRef; 8943 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8944 L; 8945 ++Count; 8946 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8947 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8948 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8949 VarRef) = L1; 8950 auto CI = Components.rbegin(); 8951 auto CE = Components.rend(); 8952 auto SI = Components1.rbegin(); 8953 auto SE = Components1.rend(); 8954 for (; CI != CE && SI != SE; ++CI, ++SI) { 8955 if (CI->getAssociatedExpression()->getStmtClass() != 8956 SI->getAssociatedExpression()->getStmtClass()) 8957 break; 8958 // Are we dealing with different variables/fields? 8959 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8960 break; 8961 } 8962 // Found overlapping if, at least for one component, reached the head 8963 // of the components list. 8964 if (CI == CE || SI == SE) { 8965 // Ignore it if it is the same component. 8966 if (CI == CE && SI == SE) 8967 continue; 8968 const auto It = (SI == SE) ? CI : SI; 8969 // If one component is a pointer and another one is a kind of 8970 // dereference of this pointer (array subscript, section, dereference, 8971 // etc.), it is not an overlapping. 8972 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8973 std::prev(It) 8974 ->getAssociatedExpression() 8975 ->getType() 8976 ->isPointerType()) 8977 continue; 8978 const MapData &BaseData = CI == CE ? L : L1; 8979 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8980 SI == SE ? Components : Components1; 8981 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8982 OverlappedElements.getSecond().push_back(SubData); 8983 } 8984 } 8985 } 8986 // Sort the overlapped elements for each item. 8987 llvm::SmallVector<const FieldDecl *, 4> Layout; 8988 if (!OverlappedData.empty()) { 8989 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8990 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8991 while (BaseType != OrigType) { 8992 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8993 OrigType = BaseType->getPointeeOrArrayElementType(); 8994 } 8995 8996 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8997 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8998 else { 8999 const auto *RD = BaseType->getAsRecordDecl(); 9000 Layout.append(RD->field_begin(), RD->field_end()); 9001 } 9002 } 9003 for (auto &Pair : OverlappedData) { 9004 llvm::stable_sort( 9005 Pair.getSecond(), 9006 [&Layout]( 9007 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9008 OMPClauseMappableExprCommon::MappableExprComponentListRef 9009 Second) { 9010 auto CI = First.rbegin(); 9011 auto CE = First.rend(); 9012 auto SI = Second.rbegin(); 9013 auto SE = Second.rend(); 9014 for (; CI != CE && SI != SE; ++CI, ++SI) { 9015 if (CI->getAssociatedExpression()->getStmtClass() != 9016 SI->getAssociatedExpression()->getStmtClass()) 9017 break; 9018 // Are we dealing with different variables/fields? 9019 if (CI->getAssociatedDeclaration() != 9020 SI->getAssociatedDeclaration()) 9021 break; 9022 } 9023 9024 // Lists contain the same elements. 9025 if (CI == CE && SI == SE) 9026 return false; 9027 9028 // List with less elements is less than list with more elements. 9029 if (CI == CE || SI == SE) 9030 return CI == CE; 9031 9032 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9033 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9034 if (FD1->getParent() == FD2->getParent()) 9035 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9036 const auto It = 9037 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9038 return FD == FD1 || FD == FD2; 9039 }); 9040 return *It == FD1; 9041 }); 9042 } 9043 9044 // Associated with a capture, because the mapping flags depend on it. 9045 // Go through all of the elements with the overlapped elements. 9046 bool IsFirstComponentList = true; 9047 for (const auto &Pair : OverlappedData) { 9048 const MapData &L = *Pair.getFirst(); 9049 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9050 OpenMPMapClauseKind MapType; 9051 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9052 bool IsImplicit; 9053 const ValueDecl *Mapper; 9054 const Expr *VarRef; 9055 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9056 L; 9057 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9058 OverlappedComponents = Pair.getSecond(); 9059 generateInfoForComponentList( 9060 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9061 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9062 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9063 IsFirstComponentList = false; 9064 } 9065 // Go through other elements without overlapped elements. 9066 for (const MapData &L : DeclComponentLists) { 9067 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9068 OpenMPMapClauseKind MapType; 9069 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9070 bool IsImplicit; 9071 const ValueDecl *Mapper; 9072 const Expr *VarRef; 9073 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9074 L; 9075 auto It = OverlappedData.find(&L); 9076 if (It == OverlappedData.end()) 9077 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9078 Components, CombinedInfo, PartialStruct, 9079 IsFirstComponentList, IsImplicit, Mapper, 9080 /*ForDeviceAddr=*/false, VD, VarRef); 9081 IsFirstComponentList = false; 9082 } 9083 } 9084 9085 /// Generate the default map information for a given capture \a CI, 9086 /// record field declaration \a RI and captured value \a CV. 9087 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9088 const FieldDecl &RI, llvm::Value *CV, 9089 MapCombinedInfoTy &CombinedInfo) const { 9090 bool IsImplicit = true; 9091 // Do the default mapping. 9092 if (CI.capturesThis()) { 9093 CombinedInfo.Exprs.push_back(nullptr); 9094 CombinedInfo.BasePointers.push_back(CV); 9095 CombinedInfo.Pointers.push_back(CV); 9096 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9097 CombinedInfo.Sizes.push_back( 9098 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9099 CGF.Int64Ty, /*isSigned=*/true)); 9100 // Default map type. 9101 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9102 } else if (CI.capturesVariableByCopy()) { 9103 const VarDecl *VD = CI.getCapturedVar(); 9104 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9105 CombinedInfo.BasePointers.push_back(CV); 9106 CombinedInfo.Pointers.push_back(CV); 9107 if (!RI.getType()->isAnyPointerType()) { 9108 // We have to signal to the runtime captures passed by value that are 9109 // not pointers. 9110 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9111 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9112 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9113 } else { 9114 // Pointers are implicitly mapped with a zero size and no flags 9115 // (other than first map that is added for all implicit maps). 9116 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9117 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9118 } 9119 auto I = FirstPrivateDecls.find(VD); 9120 if (I != FirstPrivateDecls.end()) 9121 IsImplicit = I->getSecond(); 9122 } else { 9123 assert(CI.capturesVariable() && "Expected captured reference."); 9124 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9125 QualType ElementType = PtrTy->getPointeeType(); 9126 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9127 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9128 // The default map type for a scalar/complex type is 'to' because by 9129 // default the value doesn't have to be retrieved. For an aggregate 9130 // type, the default is 'tofrom'. 9131 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9132 const VarDecl *VD = CI.getCapturedVar(); 9133 auto I = FirstPrivateDecls.find(VD); 9134 if (I != FirstPrivateDecls.end() && 9135 VD->getType().isConstant(CGF.getContext())) { 9136 llvm::Constant *Addr = 9137 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9138 // Copy the value of the original variable to the new global copy. 9139 CGF.Builder.CreateMemCpy( 9140 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9141 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9142 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9143 // Use new global variable as the base pointers. 9144 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9145 CombinedInfo.BasePointers.push_back(Addr); 9146 CombinedInfo.Pointers.push_back(Addr); 9147 } else { 9148 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9149 CombinedInfo.BasePointers.push_back(CV); 9150 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9151 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9152 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9153 AlignmentSource::Decl)); 9154 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9155 } else { 9156 CombinedInfo.Pointers.push_back(CV); 9157 } 9158 } 9159 if (I != FirstPrivateDecls.end()) 9160 IsImplicit = I->getSecond(); 9161 } 9162 // Every default map produces a single argument which is a target parameter. 9163 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9164 9165 // Add flag stating this is an implicit map. 9166 if (IsImplicit) 9167 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9168 9169 // No user-defined mapper for default mapping. 9170 CombinedInfo.Mappers.push_back(nullptr); 9171 } 9172 }; 9173 } // anonymous namespace 9174 9175 static void emitNonContiguousDescriptor( 9176 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9177 CGOpenMPRuntime::TargetDataInfo &Info) { 9178 CodeGenModule &CGM = CGF.CGM; 9179 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9180 &NonContigInfo = CombinedInfo.NonContigInfo; 9181 9182 // Build an array of struct descriptor_dim and then assign it to 9183 // offload_args. 9184 // 9185 // struct descriptor_dim { 9186 // uint64_t offset; 9187 // uint64_t count; 9188 // uint64_t stride 9189 // }; 9190 ASTContext &C = CGF.getContext(); 9191 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9192 RecordDecl *RD; 9193 RD = C.buildImplicitRecord("descriptor_dim"); 9194 RD->startDefinition(); 9195 addFieldToRecordDecl(C, RD, Int64Ty); 9196 addFieldToRecordDecl(C, RD, Int64Ty); 9197 addFieldToRecordDecl(C, RD, Int64Ty); 9198 RD->completeDefinition(); 9199 QualType DimTy = C.getRecordType(RD); 9200 9201 enum { OffsetFD = 0, CountFD, StrideFD }; 9202 // We need two index variable here since the size of "Dims" is the same as the 9203 // size of Components, however, the size of offset, count, and stride is equal 9204 // to the size of base declaration that is non-contiguous. 9205 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9206 // Skip emitting ir if dimension size is 1 since it cannot be 9207 // non-contiguous. 9208 if (NonContigInfo.Dims[I] == 1) 9209 continue; 9210 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9211 QualType ArrayTy = 9212 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9213 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9214 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9215 unsigned RevIdx = EE - II - 1; 9216 LValue DimsLVal = CGF.MakeAddrLValue( 9217 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9218 // Offset 9219 LValue OffsetLVal = CGF.EmitLValueForField( 9220 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9221 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9222 // Count 9223 LValue CountLVal = CGF.EmitLValueForField( 9224 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9225 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9226 // Stride 9227 LValue StrideLVal = CGF.EmitLValueForField( 9228 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9229 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9230 } 9231 // args[I] = &dims 9232 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9233 DimsAddr, CGM.Int8PtrTy); 9234 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9235 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9236 Info.PointersArray, 0, I); 9237 Address PAddr(P, CGF.getPointerAlign()); 9238 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9239 ++L; 9240 } 9241 } 9242 9243 /// Emit a string constant containing the names of the values mapped to the 9244 /// offloading runtime library. 9245 llvm::Constant * 9246 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9247 MappableExprsHandler::MappingExprInfo &MapExprs) { 9248 llvm::Constant *SrcLocStr; 9249 if (!MapExprs.getMapDecl()) { 9250 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9251 } else { 9252 std::string ExprName = ""; 9253 if (MapExprs.getMapExpr()) { 9254 PrintingPolicy P(CGF.getContext().getLangOpts()); 9255 llvm::raw_string_ostream OS(ExprName); 9256 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9257 OS.flush(); 9258 } else { 9259 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9260 } 9261 9262 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9263 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9264 const char *FileName = PLoc.getFilename(); 9265 unsigned Line = PLoc.getLine(); 9266 unsigned Column = PLoc.getColumn(); 9267 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9268 Line, Column); 9269 } 9270 9271 return SrcLocStr; 9272 } 9273 9274 /// Emit the arrays used to pass the captures and map information to the 9275 /// offloading runtime library. If there is no map or capture information, 9276 /// return nullptr by reference. 9277 static void emitOffloadingArrays( 9278 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9279 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9280 bool IsNonContiguous = false) { 9281 CodeGenModule &CGM = CGF.CGM; 9282 ASTContext &Ctx = CGF.getContext(); 9283 9284 // Reset the array information. 9285 Info.clearArrayInfo(); 9286 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9287 9288 if (Info.NumberOfPtrs) { 9289 // Detect if we have any capture size requiring runtime evaluation of the 9290 // size so that a constant array could be eventually used. 9291 bool hasRuntimeEvaluationCaptureSize = false; 9292 for (llvm::Value *S : CombinedInfo.Sizes) 9293 if (!isa<llvm::Constant>(S)) { 9294 hasRuntimeEvaluationCaptureSize = true; 9295 break; 9296 } 9297 9298 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9299 QualType PointerArrayType = Ctx.getConstantArrayType( 9300 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9301 /*IndexTypeQuals=*/0); 9302 9303 Info.BasePointersArray = 9304 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9305 Info.PointersArray = 9306 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9307 Address MappersArray = 9308 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9309 Info.MappersArray = MappersArray.getPointer(); 9310 9311 // If we don't have any VLA types or other types that require runtime 9312 // evaluation, we can use a constant array for the map sizes, otherwise we 9313 // need to fill up the arrays as we do for the pointers. 9314 QualType Int64Ty = 9315 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9316 if (hasRuntimeEvaluationCaptureSize) { 9317 QualType SizeArrayType = Ctx.getConstantArrayType( 9318 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9319 /*IndexTypeQuals=*/0); 9320 Info.SizesArray = 9321 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9322 } else { 9323 // We expect all the sizes to be constant, so we collect them to create 9324 // a constant array. 9325 SmallVector<llvm::Constant *, 16> ConstSizes; 9326 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9327 if (IsNonContiguous && 9328 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9329 ConstSizes.push_back(llvm::ConstantInt::get( 9330 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9331 } else { 9332 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9333 } 9334 } 9335 9336 auto *SizesArrayInit = llvm::ConstantArray::get( 9337 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9338 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9339 auto *SizesArrayGbl = new llvm::GlobalVariable( 9340 CGM.getModule(), SizesArrayInit->getType(), 9341 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9342 SizesArrayInit, Name); 9343 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9344 Info.SizesArray = SizesArrayGbl; 9345 } 9346 9347 // The map types are always constant so we don't need to generate code to 9348 // fill arrays. Instead, we create an array constant. 9349 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9350 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9351 llvm::Constant *MapTypesArrayInit = 9352 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9353 std::string MaptypesName = 9354 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9355 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9356 CGM.getModule(), MapTypesArrayInit->getType(), 9357 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9358 MapTypesArrayInit, MaptypesName); 9359 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9360 Info.MapTypesArray = MapTypesArrayGbl; 9361 9362 // The information types are only built if there is debug information 9363 // requested. 9364 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9365 Info.MapNamesArray = llvm::Constant::getNullValue( 9366 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9367 } else { 9368 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9369 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9370 }; 9371 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9372 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9373 9374 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9375 llvm::ArrayType::get( 9376 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9377 CombinedInfo.Exprs.size()), 9378 InfoMap); 9379 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9380 CGM.getModule(), MapNamesArrayInit->getType(), 9381 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9382 MapNamesArrayInit, 9383 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9384 Info.MapNamesArray = MapNamesArrayGbl; 9385 } 9386 9387 // If there's a present map type modifier, it must not be applied to the end 9388 // of a region, so generate a separate map type array in that case. 9389 if (Info.separateBeginEndCalls()) { 9390 bool EndMapTypesDiffer = false; 9391 for (uint64_t &Type : Mapping) { 9392 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9393 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9394 EndMapTypesDiffer = true; 9395 } 9396 } 9397 if (EndMapTypesDiffer) { 9398 MapTypesArrayInit = 9399 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9400 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9401 MapTypesArrayGbl = new llvm::GlobalVariable( 9402 CGM.getModule(), MapTypesArrayInit->getType(), 9403 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9404 MapTypesArrayInit, MaptypesName); 9405 MapTypesArrayGbl->setUnnamedAddr( 9406 llvm::GlobalValue::UnnamedAddr::Global); 9407 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9408 } 9409 } 9410 9411 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9412 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9413 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9414 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9415 Info.BasePointersArray, 0, I); 9416 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9417 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9418 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9419 CGF.Builder.CreateStore(BPVal, BPAddr); 9420 9421 if (Info.requiresDevicePointerInfo()) 9422 if (const ValueDecl *DevVD = 9423 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9424 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9425 9426 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9427 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9428 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9429 Info.PointersArray, 0, I); 9430 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9431 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9432 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9433 CGF.Builder.CreateStore(PVal, PAddr); 9434 9435 if (hasRuntimeEvaluationCaptureSize) { 9436 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9437 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9438 Info.SizesArray, 9439 /*Idx0=*/0, 9440 /*Idx1=*/I); 9441 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9442 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9443 CGM.Int64Ty, 9444 /*isSigned=*/true), 9445 SAddr); 9446 } 9447 9448 // Fill up the mapper array. 9449 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9450 if (CombinedInfo.Mappers[I]) { 9451 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9452 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9453 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9454 Info.HasMapper = true; 9455 } 9456 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9457 CGF.Builder.CreateStore(MFunc, MAddr); 9458 } 9459 } 9460 9461 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9462 Info.NumberOfPtrs == 0) 9463 return; 9464 9465 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9466 } 9467 9468 namespace { 9469 /// Additional arguments for emitOffloadingArraysArgument function. 9470 struct ArgumentsOptions { 9471 bool ForEndCall = false; 9472 ArgumentsOptions() = default; 9473 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9474 }; 9475 } // namespace 9476 9477 /// Emit the arguments to be passed to the runtime library based on the 9478 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9479 /// ForEndCall, emit map types to be passed for the end of the region instead of 9480 /// the beginning. 9481 static void emitOffloadingArraysArgument( 9482 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9483 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9484 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9485 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9486 const ArgumentsOptions &Options = ArgumentsOptions()) { 9487 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9488 "expected region end call to runtime only when end call is separate"); 9489 CodeGenModule &CGM = CGF.CGM; 9490 if (Info.NumberOfPtrs) { 9491 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9492 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9493 Info.BasePointersArray, 9494 /*Idx0=*/0, /*Idx1=*/0); 9495 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9496 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9497 Info.PointersArray, 9498 /*Idx0=*/0, 9499 /*Idx1=*/0); 9500 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9501 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9502 /*Idx0=*/0, /*Idx1=*/0); 9503 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9504 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9505 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9506 : Info.MapTypesArray, 9507 /*Idx0=*/0, 9508 /*Idx1=*/0); 9509 9510 // Only emit the mapper information arrays if debug information is 9511 // requested. 9512 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9513 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9514 else 9515 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9516 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9517 Info.MapNamesArray, 9518 /*Idx0=*/0, 9519 /*Idx1=*/0); 9520 // If there is no user-defined mapper, set the mapper array to nullptr to 9521 // avoid an unnecessary data privatization 9522 if (!Info.HasMapper) 9523 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9524 else 9525 MappersArrayArg = 9526 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9527 } else { 9528 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9529 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9530 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9531 MapTypesArrayArg = 9532 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9533 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9534 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9535 } 9536 } 9537 9538 /// Check for inner distribute directive. 9539 static const OMPExecutableDirective * 9540 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9541 const auto *CS = D.getInnermostCapturedStmt(); 9542 const auto *Body = 9543 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9544 const Stmt *ChildStmt = 9545 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9546 9547 if (const auto *NestedDir = 9548 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9549 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9550 switch (D.getDirectiveKind()) { 9551 case OMPD_target: 9552 if (isOpenMPDistributeDirective(DKind)) 9553 return NestedDir; 9554 if (DKind == OMPD_teams) { 9555 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9556 /*IgnoreCaptured=*/true); 9557 if (!Body) 9558 return nullptr; 9559 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9560 if (const auto *NND = 9561 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9562 DKind = NND->getDirectiveKind(); 9563 if (isOpenMPDistributeDirective(DKind)) 9564 return NND; 9565 } 9566 } 9567 return nullptr; 9568 case OMPD_target_teams: 9569 if (isOpenMPDistributeDirective(DKind)) 9570 return NestedDir; 9571 return nullptr; 9572 case OMPD_target_parallel: 9573 case OMPD_target_simd: 9574 case OMPD_target_parallel_for: 9575 case OMPD_target_parallel_for_simd: 9576 return nullptr; 9577 case OMPD_target_teams_distribute: 9578 case OMPD_target_teams_distribute_simd: 9579 case OMPD_target_teams_distribute_parallel_for: 9580 case OMPD_target_teams_distribute_parallel_for_simd: 9581 case OMPD_parallel: 9582 case OMPD_for: 9583 case OMPD_parallel_for: 9584 case OMPD_parallel_master: 9585 case OMPD_parallel_sections: 9586 case OMPD_for_simd: 9587 case OMPD_parallel_for_simd: 9588 case OMPD_cancel: 9589 case OMPD_cancellation_point: 9590 case OMPD_ordered: 9591 case OMPD_threadprivate: 9592 case OMPD_allocate: 9593 case OMPD_task: 9594 case OMPD_simd: 9595 case OMPD_tile: 9596 case OMPD_sections: 9597 case OMPD_section: 9598 case OMPD_single: 9599 case OMPD_master: 9600 case OMPD_critical: 9601 case OMPD_taskyield: 9602 case OMPD_barrier: 9603 case OMPD_taskwait: 9604 case OMPD_taskgroup: 9605 case OMPD_atomic: 9606 case OMPD_flush: 9607 case OMPD_depobj: 9608 case OMPD_scan: 9609 case OMPD_teams: 9610 case OMPD_target_data: 9611 case OMPD_target_exit_data: 9612 case OMPD_target_enter_data: 9613 case OMPD_distribute: 9614 case OMPD_distribute_simd: 9615 case OMPD_distribute_parallel_for: 9616 case OMPD_distribute_parallel_for_simd: 9617 case OMPD_teams_distribute: 9618 case OMPD_teams_distribute_simd: 9619 case OMPD_teams_distribute_parallel_for: 9620 case OMPD_teams_distribute_parallel_for_simd: 9621 case OMPD_target_update: 9622 case OMPD_declare_simd: 9623 case OMPD_declare_variant: 9624 case OMPD_begin_declare_variant: 9625 case OMPD_end_declare_variant: 9626 case OMPD_declare_target: 9627 case OMPD_end_declare_target: 9628 case OMPD_declare_reduction: 9629 case OMPD_declare_mapper: 9630 case OMPD_taskloop: 9631 case OMPD_taskloop_simd: 9632 case OMPD_master_taskloop: 9633 case OMPD_master_taskloop_simd: 9634 case OMPD_parallel_master_taskloop: 9635 case OMPD_parallel_master_taskloop_simd: 9636 case OMPD_requires: 9637 case OMPD_unknown: 9638 default: 9639 llvm_unreachable("Unexpected directive."); 9640 } 9641 } 9642 9643 return nullptr; 9644 } 9645 9646 /// Emit the user-defined mapper function. The code generation follows the 9647 /// pattern in the example below. 9648 /// \code 9649 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9650 /// void *base, void *begin, 9651 /// int64_t size, int64_t type, 9652 /// void *name = nullptr) { 9653 /// // Allocate space for an array section first or add a base/begin for 9654 /// // pointer dereference. 9655 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9656 /// !maptype.IsDelete) 9657 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9658 /// size*sizeof(Ty), clearToFromMember(type)); 9659 /// // Map members. 9660 /// for (unsigned i = 0; i < size; i++) { 9661 /// // For each component specified by this mapper: 9662 /// for (auto c : begin[i]->all_components) { 9663 /// if (c.hasMapper()) 9664 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9665 /// c.arg_type, c.arg_name); 9666 /// else 9667 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9668 /// c.arg_begin, c.arg_size, c.arg_type, 9669 /// c.arg_name); 9670 /// } 9671 /// } 9672 /// // Delete the array section. 9673 /// if (size > 1 && maptype.IsDelete) 9674 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9675 /// size*sizeof(Ty), clearToFromMember(type)); 9676 /// } 9677 /// \endcode 9678 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9679 CodeGenFunction *CGF) { 9680 if (UDMMap.count(D) > 0) 9681 return; 9682 ASTContext &C = CGM.getContext(); 9683 QualType Ty = D->getType(); 9684 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9685 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9686 auto *MapperVarDecl = 9687 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9688 SourceLocation Loc = D->getLocation(); 9689 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9690 9691 // Prepare mapper function arguments and attributes. 9692 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9693 C.VoidPtrTy, ImplicitParamDecl::Other); 9694 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9695 ImplicitParamDecl::Other); 9696 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9697 C.VoidPtrTy, ImplicitParamDecl::Other); 9698 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9699 ImplicitParamDecl::Other); 9700 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9701 ImplicitParamDecl::Other); 9702 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9703 ImplicitParamDecl::Other); 9704 FunctionArgList Args; 9705 Args.push_back(&HandleArg); 9706 Args.push_back(&BaseArg); 9707 Args.push_back(&BeginArg); 9708 Args.push_back(&SizeArg); 9709 Args.push_back(&TypeArg); 9710 Args.push_back(&NameArg); 9711 const CGFunctionInfo &FnInfo = 9712 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9713 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9714 SmallString<64> TyStr; 9715 llvm::raw_svector_ostream Out(TyStr); 9716 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9717 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9718 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9719 Name, &CGM.getModule()); 9720 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9721 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9722 // Start the mapper function code generation. 9723 CodeGenFunction MapperCGF(CGM); 9724 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9725 // Compute the starting and end addresses of array elements. 9726 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9727 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9728 C.getPointerType(Int64Ty), Loc); 9729 // Prepare common arguments for array initiation and deletion. 9730 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9731 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9732 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9733 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9734 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9735 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9736 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9737 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9738 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9739 // Convert the size in bytes into the number of array elements. 9740 Size = MapperCGF.Builder.CreateExactUDiv( 9741 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9742 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9743 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9744 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9745 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9746 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9747 C.getPointerType(Int64Ty), Loc); 9748 9749 // Emit array initiation if this is an array section and \p MapType indicates 9750 // that memory allocation is required. 9751 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9752 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9753 ElementSize, HeadBB, /*IsInit=*/true); 9754 9755 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9756 9757 // Emit the loop header block. 9758 MapperCGF.EmitBlock(HeadBB); 9759 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9760 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9761 // Evaluate whether the initial condition is satisfied. 9762 llvm::Value *IsEmpty = 9763 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9764 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9765 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9766 9767 // Emit the loop body block. 9768 MapperCGF.EmitBlock(BodyBB); 9769 llvm::BasicBlock *LastBB = BodyBB; 9770 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9771 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9772 PtrPHI->addIncoming(PtrBegin, EntryBB); 9773 Address PtrCurrent = 9774 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9775 .getAlignment() 9776 .alignmentOfArrayElement(ElementSize)); 9777 // Privatize the declared variable of mapper to be the current array element. 9778 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9779 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9780 (void)Scope.Privatize(); 9781 9782 // Get map clause information. Fill up the arrays with all mapped variables. 9783 MappableExprsHandler::MapCombinedInfoTy Info; 9784 MappableExprsHandler MEHandler(*D, MapperCGF); 9785 MEHandler.generateAllInfoForMapper(Info); 9786 9787 // Call the runtime API __tgt_mapper_num_components to get the number of 9788 // pre-existing components. 9789 llvm::Value *OffloadingArgs[] = {Handle}; 9790 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9791 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9792 OMPRTL___tgt_mapper_num_components), 9793 OffloadingArgs); 9794 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9795 PreviousSize, 9796 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9797 9798 // Fill up the runtime mapper handle for all components. 9799 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9800 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9801 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9802 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9803 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9804 llvm::Value *CurSizeArg = Info.Sizes[I]; 9805 llvm::Value *CurNameArg = 9806 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9807 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9808 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9809 9810 // Extract the MEMBER_OF field from the map type. 9811 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9812 llvm::Value *MemberMapType = 9813 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9814 9815 // Combine the map type inherited from user-defined mapper with that 9816 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9817 // bits of the \a MapType, which is the input argument of the mapper 9818 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9819 // bits of MemberMapType. 9820 // [OpenMP 5.0], 1.2.6. map-type decay. 9821 // | alloc | to | from | tofrom | release | delete 9822 // ---------------------------------------------------------- 9823 // alloc | alloc | alloc | alloc | alloc | release | delete 9824 // to | alloc | to | alloc | to | release | delete 9825 // from | alloc | alloc | from | from | release | delete 9826 // tofrom | alloc | to | from | tofrom | release | delete 9827 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9828 MapType, 9829 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9830 MappableExprsHandler::OMP_MAP_FROM)); 9831 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9832 llvm::BasicBlock *AllocElseBB = 9833 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9834 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9835 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9836 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9837 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9838 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9839 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9840 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9841 MapperCGF.EmitBlock(AllocBB); 9842 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9843 MemberMapType, 9844 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9845 MappableExprsHandler::OMP_MAP_FROM))); 9846 MapperCGF.Builder.CreateBr(EndBB); 9847 MapperCGF.EmitBlock(AllocElseBB); 9848 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9849 LeftToFrom, 9850 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9851 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9852 // In case of to, clear OMP_MAP_FROM. 9853 MapperCGF.EmitBlock(ToBB); 9854 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9855 MemberMapType, 9856 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9857 MapperCGF.Builder.CreateBr(EndBB); 9858 MapperCGF.EmitBlock(ToElseBB); 9859 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9860 LeftToFrom, 9861 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9862 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9863 // In case of from, clear OMP_MAP_TO. 9864 MapperCGF.EmitBlock(FromBB); 9865 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9866 MemberMapType, 9867 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9868 // In case of tofrom, do nothing. 9869 MapperCGF.EmitBlock(EndBB); 9870 LastBB = EndBB; 9871 llvm::PHINode *CurMapType = 9872 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9873 CurMapType->addIncoming(AllocMapType, AllocBB); 9874 CurMapType->addIncoming(ToMapType, ToBB); 9875 CurMapType->addIncoming(FromMapType, FromBB); 9876 CurMapType->addIncoming(MemberMapType, ToElseBB); 9877 9878 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9879 CurSizeArg, CurMapType, CurNameArg}; 9880 if (Info.Mappers[I]) { 9881 // Call the corresponding mapper function. 9882 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9883 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9884 assert(MapperFunc && "Expect a valid mapper function is available."); 9885 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9886 } else { 9887 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9888 // data structure. 9889 MapperCGF.EmitRuntimeCall( 9890 OMPBuilder.getOrCreateRuntimeFunction( 9891 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9892 OffloadingArgs); 9893 } 9894 } 9895 9896 // Update the pointer to point to the next element that needs to be mapped, 9897 // and check whether we have mapped all elements. 9898 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9899 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9900 PtrPHI->addIncoming(PtrNext, LastBB); 9901 llvm::Value *IsDone = 9902 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9903 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9904 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9905 9906 MapperCGF.EmitBlock(ExitBB); 9907 // Emit array deletion if this is an array section and \p MapType indicates 9908 // that deletion is required. 9909 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9910 ElementSize, DoneBB, /*IsInit=*/false); 9911 9912 // Emit the function exit block. 9913 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9914 MapperCGF.FinishFunction(); 9915 UDMMap.try_emplace(D, Fn); 9916 if (CGF) { 9917 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9918 Decls.second.push_back(D); 9919 } 9920 } 9921 9922 /// Emit the array initialization or deletion portion for user-defined mapper 9923 /// code generation. First, it evaluates whether an array section is mapped and 9924 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9925 /// true, and \a MapType indicates to not delete this array, array 9926 /// initialization code is generated. If \a IsInit is false, and \a MapType 9927 /// indicates to not this array, array deletion code is generated. 9928 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9929 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9930 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9931 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9932 StringRef Prefix = IsInit ? ".init" : ".del"; 9933 9934 // Evaluate if this is an array section. 9935 llvm::BasicBlock *BodyBB = 9936 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9937 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9938 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9939 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9940 MapType, 9941 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9942 llvm::Value *DeleteCond; 9943 llvm::Value *Cond; 9944 if (IsInit) { 9945 // base != begin? 9946 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9947 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9948 // IsPtrAndObj? 9949 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9950 MapType, 9951 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9952 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9953 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9954 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9955 DeleteCond = MapperCGF.Builder.CreateIsNull( 9956 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9957 } else { 9958 Cond = IsArray; 9959 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9960 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9961 } 9962 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9963 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9964 9965 MapperCGF.EmitBlock(BodyBB); 9966 // Get the array size by multiplying element size and element number (i.e., \p 9967 // Size). 9968 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9969 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9970 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9971 // memory allocation/deletion purpose only. 9972 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9973 MapType, 9974 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9975 MappableExprsHandler::OMP_MAP_FROM | 9976 MappableExprsHandler::OMP_MAP_MEMBER_OF))); 9977 llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9978 9979 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9980 // data structure. 9981 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9982 ArraySize, MapTypeArg, MapNameArg}; 9983 MapperCGF.EmitRuntimeCall( 9984 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9985 OMPRTL___tgt_push_mapper_component), 9986 OffloadingArgs); 9987 } 9988 9989 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9990 const OMPDeclareMapperDecl *D) { 9991 auto I = UDMMap.find(D); 9992 if (I != UDMMap.end()) 9993 return I->second; 9994 emitUserDefinedMapper(D); 9995 return UDMMap.lookup(D); 9996 } 9997 9998 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9999 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10000 llvm::Value *DeviceID, 10001 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10002 const OMPLoopDirective &D)> 10003 SizeEmitter) { 10004 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10005 const OMPExecutableDirective *TD = &D; 10006 // Get nested teams distribute kind directive, if any. 10007 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10008 TD = getNestedDistributeDirective(CGM.getContext(), D); 10009 if (!TD) 10010 return; 10011 const auto *LD = cast<OMPLoopDirective>(TD); 10012 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10013 PrePostActionTy &) { 10014 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10015 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10016 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10017 CGF.EmitRuntimeCall( 10018 OMPBuilder.getOrCreateRuntimeFunction( 10019 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10020 Args); 10021 } 10022 }; 10023 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10024 } 10025 10026 void CGOpenMPRuntime::emitTargetCall( 10027 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10028 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10029 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10030 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10031 const OMPLoopDirective &D)> 10032 SizeEmitter) { 10033 if (!CGF.HaveInsertPoint()) 10034 return; 10035 10036 assert(OutlinedFn && "Invalid outlined function!"); 10037 10038 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10039 D.hasClausesOfKind<OMPNowaitClause>(); 10040 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10041 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10042 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10043 PrePostActionTy &) { 10044 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10045 }; 10046 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10047 10048 CodeGenFunction::OMPTargetDataInfo InputInfo; 10049 llvm::Value *MapTypesArray = nullptr; 10050 llvm::Value *MapNamesArray = nullptr; 10051 // Fill up the pointer arrays and transfer execution to the device. 10052 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10053 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10054 &CapturedVars, 10055 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10056 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10057 // Reverse offloading is not supported, so just execute on the host. 10058 if (RequiresOuterTask) { 10059 CapturedVars.clear(); 10060 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10061 } 10062 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10063 return; 10064 } 10065 10066 // On top of the arrays that were filled up, the target offloading call 10067 // takes as arguments the device id as well as the host pointer. The host 10068 // pointer is used by the runtime library to identify the current target 10069 // region, so it only has to be unique and not necessarily point to 10070 // anything. It could be the pointer to the outlined function that 10071 // implements the target region, but we aren't using that so that the 10072 // compiler doesn't need to keep that, and could therefore inline the host 10073 // function if proven worthwhile during optimization. 10074 10075 // From this point on, we need to have an ID of the target region defined. 10076 assert(OutlinedFnID && "Invalid outlined function ID!"); 10077 10078 // Emit device ID if any. 10079 llvm::Value *DeviceID; 10080 if (Device.getPointer()) { 10081 assert((Device.getInt() == OMPC_DEVICE_unknown || 10082 Device.getInt() == OMPC_DEVICE_device_num) && 10083 "Expected device_num modifier."); 10084 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10085 DeviceID = 10086 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10087 } else { 10088 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10089 } 10090 10091 // Emit the number of elements in the offloading arrays. 10092 llvm::Value *PointerNum = 10093 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10094 10095 // Return value of the runtime offloading call. 10096 llvm::Value *Return; 10097 10098 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10099 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10100 10101 // Source location for the ident struct 10102 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10103 10104 // Emit tripcount for the target loop-based directive. 10105 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10106 10107 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10108 // The target region is an outlined function launched by the runtime 10109 // via calls __tgt_target() or __tgt_target_teams(). 10110 // 10111 // __tgt_target() launches a target region with one team and one thread, 10112 // executing a serial region. This master thread may in turn launch 10113 // more threads within its team upon encountering a parallel region, 10114 // however, no additional teams can be launched on the device. 10115 // 10116 // __tgt_target_teams() launches a target region with one or more teams, 10117 // each with one or more threads. This call is required for target 10118 // constructs such as: 10119 // 'target teams' 10120 // 'target' / 'teams' 10121 // 'target teams distribute parallel for' 10122 // 'target parallel' 10123 // and so on. 10124 // 10125 // Note that on the host and CPU targets, the runtime implementation of 10126 // these calls simply call the outlined function without forking threads. 10127 // The outlined functions themselves have runtime calls to 10128 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10129 // the compiler in emitTeamsCall() and emitParallelCall(). 10130 // 10131 // In contrast, on the NVPTX target, the implementation of 10132 // __tgt_target_teams() launches a GPU kernel with the requested number 10133 // of teams and threads so no additional calls to the runtime are required. 10134 if (NumTeams) { 10135 // If we have NumTeams defined this means that we have an enclosed teams 10136 // region. Therefore we also expect to have NumThreads defined. These two 10137 // values should be defined in the presence of a teams directive, 10138 // regardless of having any clauses associated. If the user is using teams 10139 // but no clauses, these two values will be the default that should be 10140 // passed to the runtime library - a 32-bit integer with the value zero. 10141 assert(NumThreads && "Thread limit expression should be available along " 10142 "with number of teams."); 10143 llvm::Value *OffloadingArgs[] = {RTLoc, 10144 DeviceID, 10145 OutlinedFnID, 10146 PointerNum, 10147 InputInfo.BasePointersArray.getPointer(), 10148 InputInfo.PointersArray.getPointer(), 10149 InputInfo.SizesArray.getPointer(), 10150 MapTypesArray, 10151 MapNamesArray, 10152 InputInfo.MappersArray.getPointer(), 10153 NumTeams, 10154 NumThreads}; 10155 Return = CGF.EmitRuntimeCall( 10156 OMPBuilder.getOrCreateRuntimeFunction( 10157 CGM.getModule(), HasNowait 10158 ? OMPRTL___tgt_target_teams_nowait_mapper 10159 : OMPRTL___tgt_target_teams_mapper), 10160 OffloadingArgs); 10161 } else { 10162 llvm::Value *OffloadingArgs[] = {RTLoc, 10163 DeviceID, 10164 OutlinedFnID, 10165 PointerNum, 10166 InputInfo.BasePointersArray.getPointer(), 10167 InputInfo.PointersArray.getPointer(), 10168 InputInfo.SizesArray.getPointer(), 10169 MapTypesArray, 10170 MapNamesArray, 10171 InputInfo.MappersArray.getPointer()}; 10172 Return = CGF.EmitRuntimeCall( 10173 OMPBuilder.getOrCreateRuntimeFunction( 10174 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10175 : OMPRTL___tgt_target_mapper), 10176 OffloadingArgs); 10177 } 10178 10179 // Check the error code and execute the host version if required. 10180 llvm::BasicBlock *OffloadFailedBlock = 10181 CGF.createBasicBlock("omp_offload.failed"); 10182 llvm::BasicBlock *OffloadContBlock = 10183 CGF.createBasicBlock("omp_offload.cont"); 10184 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10185 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10186 10187 CGF.EmitBlock(OffloadFailedBlock); 10188 if (RequiresOuterTask) { 10189 CapturedVars.clear(); 10190 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10191 } 10192 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10193 CGF.EmitBranch(OffloadContBlock); 10194 10195 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10196 }; 10197 10198 // Notify that the host version must be executed. 10199 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10200 RequiresOuterTask](CodeGenFunction &CGF, 10201 PrePostActionTy &) { 10202 if (RequiresOuterTask) { 10203 CapturedVars.clear(); 10204 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10205 } 10206 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10207 }; 10208 10209 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10210 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10211 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10212 // Fill up the arrays with all the captured variables. 10213 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10214 10215 // Get mappable expression information. 10216 MappableExprsHandler MEHandler(D, CGF); 10217 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10218 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10219 10220 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10221 auto *CV = CapturedVars.begin(); 10222 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10223 CE = CS.capture_end(); 10224 CI != CE; ++CI, ++RI, ++CV) { 10225 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10226 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10227 10228 // VLA sizes are passed to the outlined region by copy and do not have map 10229 // information associated. 10230 if (CI->capturesVariableArrayType()) { 10231 CurInfo.Exprs.push_back(nullptr); 10232 CurInfo.BasePointers.push_back(*CV); 10233 CurInfo.Pointers.push_back(*CV); 10234 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10235 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10236 // Copy to the device as an argument. No need to retrieve it. 10237 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10238 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10239 MappableExprsHandler::OMP_MAP_IMPLICIT); 10240 CurInfo.Mappers.push_back(nullptr); 10241 } else { 10242 // If we have any information in the map clause, we use it, otherwise we 10243 // just do a default mapping. 10244 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10245 if (!CI->capturesThis()) 10246 MappedVarSet.insert(CI->getCapturedVar()); 10247 else 10248 MappedVarSet.insert(nullptr); 10249 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10250 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10251 // Generate correct mapping for variables captured by reference in 10252 // lambdas. 10253 if (CI->capturesVariable()) 10254 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10255 CurInfo, LambdaPointers); 10256 } 10257 // We expect to have at least an element of information for this capture. 10258 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10259 "Non-existing map pointer for capture!"); 10260 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10261 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10262 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10263 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10264 "Inconsistent map information sizes!"); 10265 10266 // If there is an entry in PartialStruct it means we have a struct with 10267 // individual members mapped. Emit an extra combined entry. 10268 if (PartialStruct.Base.isValid()) { 10269 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10270 MEHandler.emitCombinedEntry( 10271 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10272 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10273 } 10274 10275 // We need to append the results of this capture to what we already have. 10276 CombinedInfo.append(CurInfo); 10277 } 10278 // Adjust MEMBER_OF flags for the lambdas captures. 10279 MEHandler.adjustMemberOfForLambdaCaptures( 10280 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10281 CombinedInfo.Types); 10282 // Map any list items in a map clause that were not captures because they 10283 // weren't referenced within the construct. 10284 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10285 10286 TargetDataInfo Info; 10287 // Fill up the arrays and create the arguments. 10288 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10289 emitOffloadingArraysArgument( 10290 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10291 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10292 {/*ForEndTask=*/false}); 10293 10294 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10295 InputInfo.BasePointersArray = 10296 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10297 InputInfo.PointersArray = 10298 Address(Info.PointersArray, CGM.getPointerAlign()); 10299 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10300 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10301 MapTypesArray = Info.MapTypesArray; 10302 MapNamesArray = Info.MapNamesArray; 10303 if (RequiresOuterTask) 10304 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10305 else 10306 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10307 }; 10308 10309 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10310 CodeGenFunction &CGF, PrePostActionTy &) { 10311 if (RequiresOuterTask) { 10312 CodeGenFunction::OMPTargetDataInfo InputInfo; 10313 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10314 } else { 10315 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10316 } 10317 }; 10318 10319 // If we have a target function ID it means that we need to support 10320 // offloading, otherwise, just execute on the host. We need to execute on host 10321 // regardless of the conditional in the if clause if, e.g., the user do not 10322 // specify target triples. 10323 if (OutlinedFnID) { 10324 if (IfCond) { 10325 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10326 } else { 10327 RegionCodeGenTy ThenRCG(TargetThenGen); 10328 ThenRCG(CGF); 10329 } 10330 } else { 10331 RegionCodeGenTy ElseRCG(TargetElseGen); 10332 ElseRCG(CGF); 10333 } 10334 } 10335 10336 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10337 StringRef ParentName) { 10338 if (!S) 10339 return; 10340 10341 // Codegen OMP target directives that offload compute to the device. 10342 bool RequiresDeviceCodegen = 10343 isa<OMPExecutableDirective>(S) && 10344 isOpenMPTargetExecutionDirective( 10345 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10346 10347 if (RequiresDeviceCodegen) { 10348 const auto &E = *cast<OMPExecutableDirective>(S); 10349 unsigned DeviceID; 10350 unsigned FileID; 10351 unsigned Line; 10352 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10353 FileID, Line); 10354 10355 // Is this a target region that should not be emitted as an entry point? If 10356 // so just signal we are done with this target region. 10357 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10358 ParentName, Line)) 10359 return; 10360 10361 switch (E.getDirectiveKind()) { 10362 case OMPD_target: 10363 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10364 cast<OMPTargetDirective>(E)); 10365 break; 10366 case OMPD_target_parallel: 10367 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10368 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10369 break; 10370 case OMPD_target_teams: 10371 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10372 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10373 break; 10374 case OMPD_target_teams_distribute: 10375 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10376 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10377 break; 10378 case OMPD_target_teams_distribute_simd: 10379 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10380 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10381 break; 10382 case OMPD_target_parallel_for: 10383 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10384 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10385 break; 10386 case OMPD_target_parallel_for_simd: 10387 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10388 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10389 break; 10390 case OMPD_target_simd: 10391 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10392 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10393 break; 10394 case OMPD_target_teams_distribute_parallel_for: 10395 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10396 CGM, ParentName, 10397 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10398 break; 10399 case OMPD_target_teams_distribute_parallel_for_simd: 10400 CodeGenFunction:: 10401 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10402 CGM, ParentName, 10403 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10404 break; 10405 case OMPD_parallel: 10406 case OMPD_for: 10407 case OMPD_parallel_for: 10408 case OMPD_parallel_master: 10409 case OMPD_parallel_sections: 10410 case OMPD_for_simd: 10411 case OMPD_parallel_for_simd: 10412 case OMPD_cancel: 10413 case OMPD_cancellation_point: 10414 case OMPD_ordered: 10415 case OMPD_threadprivate: 10416 case OMPD_allocate: 10417 case OMPD_task: 10418 case OMPD_simd: 10419 case OMPD_tile: 10420 case OMPD_sections: 10421 case OMPD_section: 10422 case OMPD_single: 10423 case OMPD_master: 10424 case OMPD_critical: 10425 case OMPD_taskyield: 10426 case OMPD_barrier: 10427 case OMPD_taskwait: 10428 case OMPD_taskgroup: 10429 case OMPD_atomic: 10430 case OMPD_flush: 10431 case OMPD_depobj: 10432 case OMPD_scan: 10433 case OMPD_teams: 10434 case OMPD_target_data: 10435 case OMPD_target_exit_data: 10436 case OMPD_target_enter_data: 10437 case OMPD_distribute: 10438 case OMPD_distribute_simd: 10439 case OMPD_distribute_parallel_for: 10440 case OMPD_distribute_parallel_for_simd: 10441 case OMPD_teams_distribute: 10442 case OMPD_teams_distribute_simd: 10443 case OMPD_teams_distribute_parallel_for: 10444 case OMPD_teams_distribute_parallel_for_simd: 10445 case OMPD_target_update: 10446 case OMPD_declare_simd: 10447 case OMPD_declare_variant: 10448 case OMPD_begin_declare_variant: 10449 case OMPD_end_declare_variant: 10450 case OMPD_declare_target: 10451 case OMPD_end_declare_target: 10452 case OMPD_declare_reduction: 10453 case OMPD_declare_mapper: 10454 case OMPD_taskloop: 10455 case OMPD_taskloop_simd: 10456 case OMPD_master_taskloop: 10457 case OMPD_master_taskloop_simd: 10458 case OMPD_parallel_master_taskloop: 10459 case OMPD_parallel_master_taskloop_simd: 10460 case OMPD_requires: 10461 case OMPD_unknown: 10462 default: 10463 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10464 } 10465 return; 10466 } 10467 10468 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10469 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10470 return; 10471 10472 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10473 return; 10474 } 10475 10476 // If this is a lambda function, look into its body. 10477 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10478 S = L->getBody(); 10479 10480 // Keep looking for target regions recursively. 10481 for (const Stmt *II : S->children()) 10482 scanForTargetRegionsFunctions(II, ParentName); 10483 } 10484 10485 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10486 // If emitting code for the host, we do not process FD here. Instead we do 10487 // the normal code generation. 10488 if (!CGM.getLangOpts().OpenMPIsDevice) { 10489 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10490 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10491 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10492 // Do not emit device_type(nohost) functions for the host. 10493 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10494 return true; 10495 } 10496 return false; 10497 } 10498 10499 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10500 // Try to detect target regions in the function. 10501 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10502 StringRef Name = CGM.getMangledName(GD); 10503 scanForTargetRegionsFunctions(FD->getBody(), Name); 10504 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10505 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10506 // Do not emit device_type(nohost) functions for the host. 10507 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10508 return true; 10509 } 10510 10511 // Do not to emit function if it is not marked as declare target. 10512 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10513 AlreadyEmittedTargetDecls.count(VD) == 0; 10514 } 10515 10516 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10517 if (!CGM.getLangOpts().OpenMPIsDevice) 10518 return false; 10519 10520 // Check if there are Ctors/Dtors in this declaration and look for target 10521 // regions in it. We use the complete variant to produce the kernel name 10522 // mangling. 10523 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10524 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10525 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10526 StringRef ParentName = 10527 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10528 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10529 } 10530 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10531 StringRef ParentName = 10532 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10533 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10534 } 10535 } 10536 10537 // Do not to emit variable if it is not marked as declare target. 10538 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10539 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10540 cast<VarDecl>(GD.getDecl())); 10541 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10542 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10543 HasRequiresUnifiedSharedMemory)) { 10544 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10545 return true; 10546 } 10547 return false; 10548 } 10549 10550 llvm::Constant * 10551 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10552 const VarDecl *VD) { 10553 assert(VD->getType().isConstant(CGM.getContext()) && 10554 "Expected constant variable."); 10555 StringRef VarName; 10556 llvm::Constant *Addr; 10557 llvm::GlobalValue::LinkageTypes Linkage; 10558 QualType Ty = VD->getType(); 10559 SmallString<128> Buffer; 10560 { 10561 unsigned DeviceID; 10562 unsigned FileID; 10563 unsigned Line; 10564 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10565 FileID, Line); 10566 llvm::raw_svector_ostream OS(Buffer); 10567 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10568 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10569 VarName = OS.str(); 10570 } 10571 Linkage = llvm::GlobalValue::InternalLinkage; 10572 Addr = 10573 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10574 getDefaultFirstprivateAddressSpace()); 10575 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10576 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10577 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10578 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10579 VarName, Addr, VarSize, 10580 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10581 return Addr; 10582 } 10583 10584 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10585 llvm::Constant *Addr) { 10586 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10587 !CGM.getLangOpts().OpenMPIsDevice) 10588 return; 10589 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10590 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10591 if (!Res) { 10592 if (CGM.getLangOpts().OpenMPIsDevice) { 10593 // Register non-target variables being emitted in device code (debug info 10594 // may cause this). 10595 StringRef VarName = CGM.getMangledName(VD); 10596 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10597 } 10598 return; 10599 } 10600 // Register declare target variables. 10601 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10602 StringRef VarName; 10603 CharUnits VarSize; 10604 llvm::GlobalValue::LinkageTypes Linkage; 10605 10606 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10607 !HasRequiresUnifiedSharedMemory) { 10608 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10609 VarName = CGM.getMangledName(VD); 10610 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10611 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10612 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10613 } else { 10614 VarSize = CharUnits::Zero(); 10615 } 10616 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10617 // Temp solution to prevent optimizations of the internal variables. 10618 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10619 std::string RefName = getName({VarName, "ref"}); 10620 if (!CGM.GetGlobalValue(RefName)) { 10621 llvm::Constant *AddrRef = 10622 getOrCreateInternalVariable(Addr->getType(), RefName); 10623 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10624 GVAddrRef->setConstant(/*Val=*/true); 10625 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10626 GVAddrRef->setInitializer(Addr); 10627 CGM.addCompilerUsedGlobal(GVAddrRef); 10628 } 10629 } 10630 } else { 10631 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10632 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10633 HasRequiresUnifiedSharedMemory)) && 10634 "Declare target attribute must link or to with unified memory."); 10635 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10636 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10637 else 10638 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10639 10640 if (CGM.getLangOpts().OpenMPIsDevice) { 10641 VarName = Addr->getName(); 10642 Addr = nullptr; 10643 } else { 10644 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10645 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10646 } 10647 VarSize = CGM.getPointerSize(); 10648 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10649 } 10650 10651 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10652 VarName, Addr, VarSize, Flags, Linkage); 10653 } 10654 10655 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10656 if (isa<FunctionDecl>(GD.getDecl()) || 10657 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10658 return emitTargetFunctions(GD); 10659 10660 return emitTargetGlobalVariable(GD); 10661 } 10662 10663 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10664 for (const VarDecl *VD : DeferredGlobalVariables) { 10665 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10666 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10667 if (!Res) 10668 continue; 10669 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10670 !HasRequiresUnifiedSharedMemory) { 10671 CGM.EmitGlobal(VD); 10672 } else { 10673 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10674 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10675 HasRequiresUnifiedSharedMemory)) && 10676 "Expected link clause or to clause with unified memory."); 10677 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10678 } 10679 } 10680 } 10681 10682 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10683 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10684 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10685 " Expected target-based directive."); 10686 } 10687 10688 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10689 for (const OMPClause *Clause : D->clauselists()) { 10690 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10691 HasRequiresUnifiedSharedMemory = true; 10692 } else if (const auto *AC = 10693 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10694 switch (AC->getAtomicDefaultMemOrderKind()) { 10695 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10696 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10697 break; 10698 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10699 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10700 break; 10701 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10702 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10703 break; 10704 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10705 break; 10706 } 10707 } 10708 } 10709 } 10710 10711 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10712 return RequiresAtomicOrdering; 10713 } 10714 10715 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10716 LangAS &AS) { 10717 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10718 return false; 10719 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10720 switch(A->getAllocatorType()) { 10721 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10722 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10723 // Not supported, fallback to the default mem space. 10724 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10725 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10726 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10727 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10728 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10729 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10730 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10731 AS = LangAS::Default; 10732 return true; 10733 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10734 llvm_unreachable("Expected predefined allocator for the variables with the " 10735 "static storage."); 10736 } 10737 return false; 10738 } 10739 10740 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10741 return HasRequiresUnifiedSharedMemory; 10742 } 10743 10744 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10745 CodeGenModule &CGM) 10746 : CGM(CGM) { 10747 if (CGM.getLangOpts().OpenMPIsDevice) { 10748 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10749 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10750 } 10751 } 10752 10753 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10754 if (CGM.getLangOpts().OpenMPIsDevice) 10755 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10756 } 10757 10758 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10759 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10760 return true; 10761 10762 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10763 // Do not to emit function if it is marked as declare target as it was already 10764 // emitted. 10765 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10766 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10767 if (auto *F = dyn_cast_or_null<llvm::Function>( 10768 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10769 return !F->isDeclaration(); 10770 return false; 10771 } 10772 return true; 10773 } 10774 10775 return !AlreadyEmittedTargetDecls.insert(D).second; 10776 } 10777 10778 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10779 // If we don't have entries or if we are emitting code for the device, we 10780 // don't need to do anything. 10781 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10782 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10783 (OffloadEntriesInfoManager.empty() && 10784 !HasEmittedDeclareTargetRegion && 10785 !HasEmittedTargetRegion)) 10786 return nullptr; 10787 10788 // Create and register the function that handles the requires directives. 10789 ASTContext &C = CGM.getContext(); 10790 10791 llvm::Function *RequiresRegFn; 10792 { 10793 CodeGenFunction CGF(CGM); 10794 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10795 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10796 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10797 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10798 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10799 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10800 // TODO: check for other requires clauses. 10801 // The requires directive takes effect only when a target region is 10802 // present in the compilation unit. Otherwise it is ignored and not 10803 // passed to the runtime. This avoids the runtime from throwing an error 10804 // for mismatching requires clauses across compilation units that don't 10805 // contain at least 1 target region. 10806 assert((HasEmittedTargetRegion || 10807 HasEmittedDeclareTargetRegion || 10808 !OffloadEntriesInfoManager.empty()) && 10809 "Target or declare target region expected."); 10810 if (HasRequiresUnifiedSharedMemory) 10811 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10812 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10813 CGM.getModule(), OMPRTL___tgt_register_requires), 10814 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10815 CGF.FinishFunction(); 10816 } 10817 return RequiresRegFn; 10818 } 10819 10820 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10821 const OMPExecutableDirective &D, 10822 SourceLocation Loc, 10823 llvm::Function *OutlinedFn, 10824 ArrayRef<llvm::Value *> CapturedVars) { 10825 if (!CGF.HaveInsertPoint()) 10826 return; 10827 10828 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10829 CodeGenFunction::RunCleanupsScope Scope(CGF); 10830 10831 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10832 llvm::Value *Args[] = { 10833 RTLoc, 10834 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10835 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10836 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10837 RealArgs.append(std::begin(Args), std::end(Args)); 10838 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10839 10840 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10841 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10842 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10843 } 10844 10845 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10846 const Expr *NumTeams, 10847 const Expr *ThreadLimit, 10848 SourceLocation Loc) { 10849 if (!CGF.HaveInsertPoint()) 10850 return; 10851 10852 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10853 10854 llvm::Value *NumTeamsVal = 10855 NumTeams 10856 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10857 CGF.CGM.Int32Ty, /* isSigned = */ true) 10858 : CGF.Builder.getInt32(0); 10859 10860 llvm::Value *ThreadLimitVal = 10861 ThreadLimit 10862 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10863 CGF.CGM.Int32Ty, /* isSigned = */ true) 10864 : CGF.Builder.getInt32(0); 10865 10866 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10867 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10868 ThreadLimitVal}; 10869 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10870 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10871 PushNumTeamsArgs); 10872 } 10873 10874 void CGOpenMPRuntime::emitTargetDataCalls( 10875 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10876 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10877 if (!CGF.HaveInsertPoint()) 10878 return; 10879 10880 // Action used to replace the default codegen action and turn privatization 10881 // off. 10882 PrePostActionTy NoPrivAction; 10883 10884 // Generate the code for the opening of the data environment. Capture all the 10885 // arguments of the runtime call by reference because they are used in the 10886 // closing of the region. 10887 auto &&BeginThenGen = [this, &D, Device, &Info, 10888 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10889 // Fill up the arrays with all the mapped variables. 10890 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10891 10892 // Get map clause information. 10893 MappableExprsHandler MEHandler(D, CGF); 10894 MEHandler.generateAllInfo(CombinedInfo); 10895 10896 // Fill up the arrays and create the arguments. 10897 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10898 /*IsNonContiguous=*/true); 10899 10900 llvm::Value *BasePointersArrayArg = nullptr; 10901 llvm::Value *PointersArrayArg = nullptr; 10902 llvm::Value *SizesArrayArg = nullptr; 10903 llvm::Value *MapTypesArrayArg = nullptr; 10904 llvm::Value *MapNamesArrayArg = nullptr; 10905 llvm::Value *MappersArrayArg = nullptr; 10906 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10907 SizesArrayArg, MapTypesArrayArg, 10908 MapNamesArrayArg, MappersArrayArg, Info); 10909 10910 // Emit device ID if any. 10911 llvm::Value *DeviceID = nullptr; 10912 if (Device) { 10913 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10914 CGF.Int64Ty, /*isSigned=*/true); 10915 } else { 10916 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10917 } 10918 10919 // Emit the number of elements in the offloading arrays. 10920 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10921 // 10922 // Source location for the ident struct 10923 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10924 10925 llvm::Value *OffloadingArgs[] = {RTLoc, 10926 DeviceID, 10927 PointerNum, 10928 BasePointersArrayArg, 10929 PointersArrayArg, 10930 SizesArrayArg, 10931 MapTypesArrayArg, 10932 MapNamesArrayArg, 10933 MappersArrayArg}; 10934 CGF.EmitRuntimeCall( 10935 OMPBuilder.getOrCreateRuntimeFunction( 10936 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10937 OffloadingArgs); 10938 10939 // If device pointer privatization is required, emit the body of the region 10940 // here. It will have to be duplicated: with and without privatization. 10941 if (!Info.CaptureDeviceAddrMap.empty()) 10942 CodeGen(CGF); 10943 }; 10944 10945 // Generate code for the closing of the data region. 10946 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10947 PrePostActionTy &) { 10948 assert(Info.isValid() && "Invalid data environment closing arguments."); 10949 10950 llvm::Value *BasePointersArrayArg = nullptr; 10951 llvm::Value *PointersArrayArg = nullptr; 10952 llvm::Value *SizesArrayArg = nullptr; 10953 llvm::Value *MapTypesArrayArg = nullptr; 10954 llvm::Value *MapNamesArrayArg = nullptr; 10955 llvm::Value *MappersArrayArg = nullptr; 10956 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10957 SizesArrayArg, MapTypesArrayArg, 10958 MapNamesArrayArg, MappersArrayArg, Info, 10959 {/*ForEndCall=*/true}); 10960 10961 // Emit device ID if any. 10962 llvm::Value *DeviceID = nullptr; 10963 if (Device) { 10964 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10965 CGF.Int64Ty, /*isSigned=*/true); 10966 } else { 10967 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10968 } 10969 10970 // Emit the number of elements in the offloading arrays. 10971 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10972 10973 // Source location for the ident struct 10974 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10975 10976 llvm::Value *OffloadingArgs[] = {RTLoc, 10977 DeviceID, 10978 PointerNum, 10979 BasePointersArrayArg, 10980 PointersArrayArg, 10981 SizesArrayArg, 10982 MapTypesArrayArg, 10983 MapNamesArrayArg, 10984 MappersArrayArg}; 10985 CGF.EmitRuntimeCall( 10986 OMPBuilder.getOrCreateRuntimeFunction( 10987 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10988 OffloadingArgs); 10989 }; 10990 10991 // If we need device pointer privatization, we need to emit the body of the 10992 // region with no privatization in the 'else' branch of the conditional. 10993 // Otherwise, we don't have to do anything. 10994 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10995 PrePostActionTy &) { 10996 if (!Info.CaptureDeviceAddrMap.empty()) { 10997 CodeGen.setAction(NoPrivAction); 10998 CodeGen(CGF); 10999 } 11000 }; 11001 11002 // We don't have to do anything to close the region if the if clause evaluates 11003 // to false. 11004 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11005 11006 if (IfCond) { 11007 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11008 } else { 11009 RegionCodeGenTy RCG(BeginThenGen); 11010 RCG(CGF); 11011 } 11012 11013 // If we don't require privatization of device pointers, we emit the body in 11014 // between the runtime calls. This avoids duplicating the body code. 11015 if (Info.CaptureDeviceAddrMap.empty()) { 11016 CodeGen.setAction(NoPrivAction); 11017 CodeGen(CGF); 11018 } 11019 11020 if (IfCond) { 11021 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11022 } else { 11023 RegionCodeGenTy RCG(EndThenGen); 11024 RCG(CGF); 11025 } 11026 } 11027 11028 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11029 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11030 const Expr *Device) { 11031 if (!CGF.HaveInsertPoint()) 11032 return; 11033 11034 assert((isa<OMPTargetEnterDataDirective>(D) || 11035 isa<OMPTargetExitDataDirective>(D) || 11036 isa<OMPTargetUpdateDirective>(D)) && 11037 "Expecting either target enter, exit data, or update directives."); 11038 11039 CodeGenFunction::OMPTargetDataInfo InputInfo; 11040 llvm::Value *MapTypesArray = nullptr; 11041 llvm::Value *MapNamesArray = nullptr; 11042 // Generate the code for the opening of the data environment. 11043 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11044 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11045 // Emit device ID if any. 11046 llvm::Value *DeviceID = nullptr; 11047 if (Device) { 11048 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11049 CGF.Int64Ty, /*isSigned=*/true); 11050 } else { 11051 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11052 } 11053 11054 // Emit the number of elements in the offloading arrays. 11055 llvm::Constant *PointerNum = 11056 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11057 11058 // Source location for the ident struct 11059 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11060 11061 llvm::Value *OffloadingArgs[] = {RTLoc, 11062 DeviceID, 11063 PointerNum, 11064 InputInfo.BasePointersArray.getPointer(), 11065 InputInfo.PointersArray.getPointer(), 11066 InputInfo.SizesArray.getPointer(), 11067 MapTypesArray, 11068 MapNamesArray, 11069 InputInfo.MappersArray.getPointer()}; 11070 11071 // Select the right runtime function call for each standalone 11072 // directive. 11073 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11074 RuntimeFunction RTLFn; 11075 switch (D.getDirectiveKind()) { 11076 case OMPD_target_enter_data: 11077 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11078 : OMPRTL___tgt_target_data_begin_mapper; 11079 break; 11080 case OMPD_target_exit_data: 11081 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11082 : OMPRTL___tgt_target_data_end_mapper; 11083 break; 11084 case OMPD_target_update: 11085 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11086 : OMPRTL___tgt_target_data_update_mapper; 11087 break; 11088 case OMPD_parallel: 11089 case OMPD_for: 11090 case OMPD_parallel_for: 11091 case OMPD_parallel_master: 11092 case OMPD_parallel_sections: 11093 case OMPD_for_simd: 11094 case OMPD_parallel_for_simd: 11095 case OMPD_cancel: 11096 case OMPD_cancellation_point: 11097 case OMPD_ordered: 11098 case OMPD_threadprivate: 11099 case OMPD_allocate: 11100 case OMPD_task: 11101 case OMPD_simd: 11102 case OMPD_tile: 11103 case OMPD_sections: 11104 case OMPD_section: 11105 case OMPD_single: 11106 case OMPD_master: 11107 case OMPD_critical: 11108 case OMPD_taskyield: 11109 case OMPD_barrier: 11110 case OMPD_taskwait: 11111 case OMPD_taskgroup: 11112 case OMPD_atomic: 11113 case OMPD_flush: 11114 case OMPD_depobj: 11115 case OMPD_scan: 11116 case OMPD_teams: 11117 case OMPD_target_data: 11118 case OMPD_distribute: 11119 case OMPD_distribute_simd: 11120 case OMPD_distribute_parallel_for: 11121 case OMPD_distribute_parallel_for_simd: 11122 case OMPD_teams_distribute: 11123 case OMPD_teams_distribute_simd: 11124 case OMPD_teams_distribute_parallel_for: 11125 case OMPD_teams_distribute_parallel_for_simd: 11126 case OMPD_declare_simd: 11127 case OMPD_declare_variant: 11128 case OMPD_begin_declare_variant: 11129 case OMPD_end_declare_variant: 11130 case OMPD_declare_target: 11131 case OMPD_end_declare_target: 11132 case OMPD_declare_reduction: 11133 case OMPD_declare_mapper: 11134 case OMPD_taskloop: 11135 case OMPD_taskloop_simd: 11136 case OMPD_master_taskloop: 11137 case OMPD_master_taskloop_simd: 11138 case OMPD_parallel_master_taskloop: 11139 case OMPD_parallel_master_taskloop_simd: 11140 case OMPD_target: 11141 case OMPD_target_simd: 11142 case OMPD_target_teams_distribute: 11143 case OMPD_target_teams_distribute_simd: 11144 case OMPD_target_teams_distribute_parallel_for: 11145 case OMPD_target_teams_distribute_parallel_for_simd: 11146 case OMPD_target_teams: 11147 case OMPD_target_parallel: 11148 case OMPD_target_parallel_for: 11149 case OMPD_target_parallel_for_simd: 11150 case OMPD_requires: 11151 case OMPD_unknown: 11152 default: 11153 llvm_unreachable("Unexpected standalone target data directive."); 11154 break; 11155 } 11156 CGF.EmitRuntimeCall( 11157 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11158 OffloadingArgs); 11159 }; 11160 11161 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11162 &MapNamesArray](CodeGenFunction &CGF, 11163 PrePostActionTy &) { 11164 // Fill up the arrays with all the mapped variables. 11165 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11166 11167 // Get map clause information. 11168 MappableExprsHandler MEHandler(D, CGF); 11169 MEHandler.generateAllInfo(CombinedInfo); 11170 11171 TargetDataInfo Info; 11172 // Fill up the arrays and create the arguments. 11173 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11174 /*IsNonContiguous=*/true); 11175 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11176 D.hasClausesOfKind<OMPNowaitClause>(); 11177 emitOffloadingArraysArgument( 11178 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11179 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11180 {/*ForEndTask=*/false}); 11181 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11182 InputInfo.BasePointersArray = 11183 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11184 InputInfo.PointersArray = 11185 Address(Info.PointersArray, CGM.getPointerAlign()); 11186 InputInfo.SizesArray = 11187 Address(Info.SizesArray, CGM.getPointerAlign()); 11188 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11189 MapTypesArray = Info.MapTypesArray; 11190 MapNamesArray = Info.MapNamesArray; 11191 if (RequiresOuterTask) 11192 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11193 else 11194 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11195 }; 11196 11197 if (IfCond) { 11198 emitIfClause(CGF, IfCond, TargetThenGen, 11199 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11200 } else { 11201 RegionCodeGenTy ThenRCG(TargetThenGen); 11202 ThenRCG(CGF); 11203 } 11204 } 11205 11206 namespace { 11207 /// Kind of parameter in a function with 'declare simd' directive. 11208 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11209 /// Attribute set of the parameter. 11210 struct ParamAttrTy { 11211 ParamKindTy Kind = Vector; 11212 llvm::APSInt StrideOrArg; 11213 llvm::APSInt Alignment; 11214 }; 11215 } // namespace 11216 11217 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11218 ArrayRef<ParamAttrTy> ParamAttrs) { 11219 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11220 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11221 // of that clause. The VLEN value must be power of 2. 11222 // In other case the notion of the function`s "characteristic data type" (CDT) 11223 // is used to compute the vector length. 11224 // CDT is defined in the following order: 11225 // a) For non-void function, the CDT is the return type. 11226 // b) If the function has any non-uniform, non-linear parameters, then the 11227 // CDT is the type of the first such parameter. 11228 // c) If the CDT determined by a) or b) above is struct, union, or class 11229 // type which is pass-by-value (except for the type that maps to the 11230 // built-in complex data type), the characteristic data type is int. 11231 // d) If none of the above three cases is applicable, the CDT is int. 11232 // The VLEN is then determined based on the CDT and the size of vector 11233 // register of that ISA for which current vector version is generated. The 11234 // VLEN is computed using the formula below: 11235 // VLEN = sizeof(vector_register) / sizeof(CDT), 11236 // where vector register size specified in section 3.2.1 Registers and the 11237 // Stack Frame of original AMD64 ABI document. 11238 QualType RetType = FD->getReturnType(); 11239 if (RetType.isNull()) 11240 return 0; 11241 ASTContext &C = FD->getASTContext(); 11242 QualType CDT; 11243 if (!RetType.isNull() && !RetType->isVoidType()) { 11244 CDT = RetType; 11245 } else { 11246 unsigned Offset = 0; 11247 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11248 if (ParamAttrs[Offset].Kind == Vector) 11249 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11250 ++Offset; 11251 } 11252 if (CDT.isNull()) { 11253 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11254 if (ParamAttrs[I + Offset].Kind == Vector) { 11255 CDT = FD->getParamDecl(I)->getType(); 11256 break; 11257 } 11258 } 11259 } 11260 } 11261 if (CDT.isNull()) 11262 CDT = C.IntTy; 11263 CDT = CDT->getCanonicalTypeUnqualified(); 11264 if (CDT->isRecordType() || CDT->isUnionType()) 11265 CDT = C.IntTy; 11266 return C.getTypeSize(CDT); 11267 } 11268 11269 static void 11270 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11271 const llvm::APSInt &VLENVal, 11272 ArrayRef<ParamAttrTy> ParamAttrs, 11273 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11274 struct ISADataTy { 11275 char ISA; 11276 unsigned VecRegSize; 11277 }; 11278 ISADataTy ISAData[] = { 11279 { 11280 'b', 128 11281 }, // SSE 11282 { 11283 'c', 256 11284 }, // AVX 11285 { 11286 'd', 256 11287 }, // AVX2 11288 { 11289 'e', 512 11290 }, // AVX512 11291 }; 11292 llvm::SmallVector<char, 2> Masked; 11293 switch (State) { 11294 case OMPDeclareSimdDeclAttr::BS_Undefined: 11295 Masked.push_back('N'); 11296 Masked.push_back('M'); 11297 break; 11298 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11299 Masked.push_back('N'); 11300 break; 11301 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11302 Masked.push_back('M'); 11303 break; 11304 } 11305 for (char Mask : Masked) { 11306 for (const ISADataTy &Data : ISAData) { 11307 SmallString<256> Buffer; 11308 llvm::raw_svector_ostream Out(Buffer); 11309 Out << "_ZGV" << Data.ISA << Mask; 11310 if (!VLENVal) { 11311 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11312 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11313 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11314 } else { 11315 Out << VLENVal; 11316 } 11317 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11318 switch (ParamAttr.Kind){ 11319 case LinearWithVarStride: 11320 Out << 's' << ParamAttr.StrideOrArg; 11321 break; 11322 case Linear: 11323 Out << 'l'; 11324 if (ParamAttr.StrideOrArg != 1) 11325 Out << ParamAttr.StrideOrArg; 11326 break; 11327 case Uniform: 11328 Out << 'u'; 11329 break; 11330 case Vector: 11331 Out << 'v'; 11332 break; 11333 } 11334 if (!!ParamAttr.Alignment) 11335 Out << 'a' << ParamAttr.Alignment; 11336 } 11337 Out << '_' << Fn->getName(); 11338 Fn->addFnAttr(Out.str()); 11339 } 11340 } 11341 } 11342 11343 // This are the Functions that are needed to mangle the name of the 11344 // vector functions generated by the compiler, according to the rules 11345 // defined in the "Vector Function ABI specifications for AArch64", 11346 // available at 11347 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11348 11349 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11350 /// 11351 /// TODO: Need to implement the behavior for reference marked with a 11352 /// var or no linear modifiers (1.b in the section). For this, we 11353 /// need to extend ParamKindTy to support the linear modifiers. 11354 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11355 QT = QT.getCanonicalType(); 11356 11357 if (QT->isVoidType()) 11358 return false; 11359 11360 if (Kind == ParamKindTy::Uniform) 11361 return false; 11362 11363 if (Kind == ParamKindTy::Linear) 11364 return false; 11365 11366 // TODO: Handle linear references with modifiers 11367 11368 if (Kind == ParamKindTy::LinearWithVarStride) 11369 return false; 11370 11371 return true; 11372 } 11373 11374 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11375 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11376 QT = QT.getCanonicalType(); 11377 unsigned Size = C.getTypeSize(QT); 11378 11379 // Only scalars and complex within 16 bytes wide set PVB to true. 11380 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11381 return false; 11382 11383 if (QT->isFloatingType()) 11384 return true; 11385 11386 if (QT->isIntegerType()) 11387 return true; 11388 11389 if (QT->isPointerType()) 11390 return true; 11391 11392 // TODO: Add support for complex types (section 3.1.2, item 2). 11393 11394 return false; 11395 } 11396 11397 /// Computes the lane size (LS) of a return type or of an input parameter, 11398 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11399 /// TODO: Add support for references, section 3.2.1, item 1. 11400 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11401 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11402 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11403 if (getAArch64PBV(PTy, C)) 11404 return C.getTypeSize(PTy); 11405 } 11406 if (getAArch64PBV(QT, C)) 11407 return C.getTypeSize(QT); 11408 11409 return C.getTypeSize(C.getUIntPtrType()); 11410 } 11411 11412 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11413 // signature of the scalar function, as defined in 3.2.2 of the 11414 // AAVFABI. 11415 static std::tuple<unsigned, unsigned, bool> 11416 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11417 QualType RetType = FD->getReturnType().getCanonicalType(); 11418 11419 ASTContext &C = FD->getASTContext(); 11420 11421 bool OutputBecomesInput = false; 11422 11423 llvm::SmallVector<unsigned, 8> Sizes; 11424 if (!RetType->isVoidType()) { 11425 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11426 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11427 OutputBecomesInput = true; 11428 } 11429 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11430 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11431 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11432 } 11433 11434 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11435 // The LS of a function parameter / return value can only be a power 11436 // of 2, starting from 8 bits, up to 128. 11437 assert(std::all_of(Sizes.begin(), Sizes.end(), 11438 [](unsigned Size) { 11439 return Size == 8 || Size == 16 || Size == 32 || 11440 Size == 64 || Size == 128; 11441 }) && 11442 "Invalid size"); 11443 11444 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11445 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11446 OutputBecomesInput); 11447 } 11448 11449 /// Mangle the parameter part of the vector function name according to 11450 /// their OpenMP classification. The mangling function is defined in 11451 /// section 3.5 of the AAVFABI. 11452 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11453 SmallString<256> Buffer; 11454 llvm::raw_svector_ostream Out(Buffer); 11455 for (const auto &ParamAttr : ParamAttrs) { 11456 switch (ParamAttr.Kind) { 11457 case LinearWithVarStride: 11458 Out << "ls" << ParamAttr.StrideOrArg; 11459 break; 11460 case Linear: 11461 Out << 'l'; 11462 // Don't print the step value if it is not present or if it is 11463 // equal to 1. 11464 if (ParamAttr.StrideOrArg != 1) 11465 Out << ParamAttr.StrideOrArg; 11466 break; 11467 case Uniform: 11468 Out << 'u'; 11469 break; 11470 case Vector: 11471 Out << 'v'; 11472 break; 11473 } 11474 11475 if (!!ParamAttr.Alignment) 11476 Out << 'a' << ParamAttr.Alignment; 11477 } 11478 11479 return std::string(Out.str()); 11480 } 11481 11482 // Function used to add the attribute. The parameter `VLEN` is 11483 // templated to allow the use of "x" when targeting scalable functions 11484 // for SVE. 11485 template <typename T> 11486 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11487 char ISA, StringRef ParSeq, 11488 StringRef MangledName, bool OutputBecomesInput, 11489 llvm::Function *Fn) { 11490 SmallString<256> Buffer; 11491 llvm::raw_svector_ostream Out(Buffer); 11492 Out << Prefix << ISA << LMask << VLEN; 11493 if (OutputBecomesInput) 11494 Out << "v"; 11495 Out << ParSeq << "_" << MangledName; 11496 Fn->addFnAttr(Out.str()); 11497 } 11498 11499 // Helper function to generate the Advanced SIMD names depending on 11500 // the value of the NDS when simdlen is not present. 11501 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11502 StringRef Prefix, char ISA, 11503 StringRef ParSeq, StringRef MangledName, 11504 bool OutputBecomesInput, 11505 llvm::Function *Fn) { 11506 switch (NDS) { 11507 case 8: 11508 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11509 OutputBecomesInput, Fn); 11510 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11511 OutputBecomesInput, Fn); 11512 break; 11513 case 16: 11514 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11515 OutputBecomesInput, Fn); 11516 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11517 OutputBecomesInput, Fn); 11518 break; 11519 case 32: 11520 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11521 OutputBecomesInput, Fn); 11522 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11523 OutputBecomesInput, Fn); 11524 break; 11525 case 64: 11526 case 128: 11527 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11528 OutputBecomesInput, Fn); 11529 break; 11530 default: 11531 llvm_unreachable("Scalar type is too wide."); 11532 } 11533 } 11534 11535 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11536 static void emitAArch64DeclareSimdFunction( 11537 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11538 ArrayRef<ParamAttrTy> ParamAttrs, 11539 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11540 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11541 11542 // Get basic data for building the vector signature. 11543 const auto Data = getNDSWDS(FD, ParamAttrs); 11544 const unsigned NDS = std::get<0>(Data); 11545 const unsigned WDS = std::get<1>(Data); 11546 const bool OutputBecomesInput = std::get<2>(Data); 11547 11548 // Check the values provided via `simdlen` by the user. 11549 // 1. A `simdlen(1)` doesn't produce vector signatures, 11550 if (UserVLEN == 1) { 11551 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11552 DiagnosticsEngine::Warning, 11553 "The clause simdlen(1) has no effect when targeting aarch64."); 11554 CGM.getDiags().Report(SLoc, DiagID); 11555 return; 11556 } 11557 11558 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11559 // Advanced SIMD output. 11560 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11561 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11562 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11563 "power of 2 when targeting Advanced SIMD."); 11564 CGM.getDiags().Report(SLoc, DiagID); 11565 return; 11566 } 11567 11568 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11569 // limits. 11570 if (ISA == 's' && UserVLEN != 0) { 11571 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11572 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11573 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11574 "lanes in the architectural constraints " 11575 "for SVE (min is 128-bit, max is " 11576 "2048-bit, by steps of 128-bit)"); 11577 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11578 return; 11579 } 11580 } 11581 11582 // Sort out parameter sequence. 11583 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11584 StringRef Prefix = "_ZGV"; 11585 // Generate simdlen from user input (if any). 11586 if (UserVLEN) { 11587 if (ISA == 's') { 11588 // SVE generates only a masked function. 11589 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11590 OutputBecomesInput, Fn); 11591 } else { 11592 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11593 // Advanced SIMD generates one or two functions, depending on 11594 // the `[not]inbranch` clause. 11595 switch (State) { 11596 case OMPDeclareSimdDeclAttr::BS_Undefined: 11597 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11598 OutputBecomesInput, Fn); 11599 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11600 OutputBecomesInput, Fn); 11601 break; 11602 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11603 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11604 OutputBecomesInput, Fn); 11605 break; 11606 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11607 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11608 OutputBecomesInput, Fn); 11609 break; 11610 } 11611 } 11612 } else { 11613 // If no user simdlen is provided, follow the AAVFABI rules for 11614 // generating the vector length. 11615 if (ISA == 's') { 11616 // SVE, section 3.4.1, item 1. 11617 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11618 OutputBecomesInput, Fn); 11619 } else { 11620 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11621 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11622 // two vector names depending on the use of the clause 11623 // `[not]inbranch`. 11624 switch (State) { 11625 case OMPDeclareSimdDeclAttr::BS_Undefined: 11626 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11627 OutputBecomesInput, Fn); 11628 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11629 OutputBecomesInput, Fn); 11630 break; 11631 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11632 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11633 OutputBecomesInput, Fn); 11634 break; 11635 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11636 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11637 OutputBecomesInput, Fn); 11638 break; 11639 } 11640 } 11641 } 11642 } 11643 11644 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11645 llvm::Function *Fn) { 11646 ASTContext &C = CGM.getContext(); 11647 FD = FD->getMostRecentDecl(); 11648 // Map params to their positions in function decl. 11649 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11650 if (isa<CXXMethodDecl>(FD)) 11651 ParamPositions.try_emplace(FD, 0); 11652 unsigned ParamPos = ParamPositions.size(); 11653 for (const ParmVarDecl *P : FD->parameters()) { 11654 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11655 ++ParamPos; 11656 } 11657 while (FD) { 11658 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11659 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11660 // Mark uniform parameters. 11661 for (const Expr *E : Attr->uniforms()) { 11662 E = E->IgnoreParenImpCasts(); 11663 unsigned Pos; 11664 if (isa<CXXThisExpr>(E)) { 11665 Pos = ParamPositions[FD]; 11666 } else { 11667 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11668 ->getCanonicalDecl(); 11669 Pos = ParamPositions[PVD]; 11670 } 11671 ParamAttrs[Pos].Kind = Uniform; 11672 } 11673 // Get alignment info. 11674 auto NI = Attr->alignments_begin(); 11675 for (const Expr *E : Attr->aligneds()) { 11676 E = E->IgnoreParenImpCasts(); 11677 unsigned Pos; 11678 QualType ParmTy; 11679 if (isa<CXXThisExpr>(E)) { 11680 Pos = ParamPositions[FD]; 11681 ParmTy = E->getType(); 11682 } else { 11683 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11684 ->getCanonicalDecl(); 11685 Pos = ParamPositions[PVD]; 11686 ParmTy = PVD->getType(); 11687 } 11688 ParamAttrs[Pos].Alignment = 11689 (*NI) 11690 ? (*NI)->EvaluateKnownConstInt(C) 11691 : llvm::APSInt::getUnsigned( 11692 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11693 .getQuantity()); 11694 ++NI; 11695 } 11696 // Mark linear parameters. 11697 auto SI = Attr->steps_begin(); 11698 auto MI = Attr->modifiers_begin(); 11699 for (const Expr *E : Attr->linears()) { 11700 E = E->IgnoreParenImpCasts(); 11701 unsigned Pos; 11702 // Rescaling factor needed to compute the linear parameter 11703 // value in the mangled name. 11704 unsigned PtrRescalingFactor = 1; 11705 if (isa<CXXThisExpr>(E)) { 11706 Pos = ParamPositions[FD]; 11707 } else { 11708 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11709 ->getCanonicalDecl(); 11710 Pos = ParamPositions[PVD]; 11711 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11712 PtrRescalingFactor = CGM.getContext() 11713 .getTypeSizeInChars(P->getPointeeType()) 11714 .getQuantity(); 11715 } 11716 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11717 ParamAttr.Kind = Linear; 11718 // Assuming a stride of 1, for `linear` without modifiers. 11719 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11720 if (*SI) { 11721 Expr::EvalResult Result; 11722 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11723 if (const auto *DRE = 11724 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11725 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11726 ParamAttr.Kind = LinearWithVarStride; 11727 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11728 ParamPositions[StridePVD->getCanonicalDecl()]); 11729 } 11730 } 11731 } else { 11732 ParamAttr.StrideOrArg = Result.Val.getInt(); 11733 } 11734 } 11735 // If we are using a linear clause on a pointer, we need to 11736 // rescale the value of linear_step with the byte size of the 11737 // pointee type. 11738 if (Linear == ParamAttr.Kind) 11739 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11740 ++SI; 11741 ++MI; 11742 } 11743 llvm::APSInt VLENVal; 11744 SourceLocation ExprLoc; 11745 const Expr *VLENExpr = Attr->getSimdlen(); 11746 if (VLENExpr) { 11747 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11748 ExprLoc = VLENExpr->getExprLoc(); 11749 } 11750 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11751 if (CGM.getTriple().isX86()) { 11752 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11753 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11754 unsigned VLEN = VLENVal.getExtValue(); 11755 StringRef MangledName = Fn->getName(); 11756 if (CGM.getTarget().hasFeature("sve")) 11757 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11758 MangledName, 's', 128, Fn, ExprLoc); 11759 if (CGM.getTarget().hasFeature("neon")) 11760 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11761 MangledName, 'n', 128, Fn, ExprLoc); 11762 } 11763 } 11764 FD = FD->getPreviousDecl(); 11765 } 11766 } 11767 11768 namespace { 11769 /// Cleanup action for doacross support. 11770 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11771 public: 11772 static const int DoacrossFinArgs = 2; 11773 11774 private: 11775 llvm::FunctionCallee RTLFn; 11776 llvm::Value *Args[DoacrossFinArgs]; 11777 11778 public: 11779 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11780 ArrayRef<llvm::Value *> CallArgs) 11781 : RTLFn(RTLFn) { 11782 assert(CallArgs.size() == DoacrossFinArgs); 11783 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11784 } 11785 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11786 if (!CGF.HaveInsertPoint()) 11787 return; 11788 CGF.EmitRuntimeCall(RTLFn, Args); 11789 } 11790 }; 11791 } // namespace 11792 11793 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11794 const OMPLoopDirective &D, 11795 ArrayRef<Expr *> NumIterations) { 11796 if (!CGF.HaveInsertPoint()) 11797 return; 11798 11799 ASTContext &C = CGM.getContext(); 11800 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11801 RecordDecl *RD; 11802 if (KmpDimTy.isNull()) { 11803 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11804 // kmp_int64 lo; // lower 11805 // kmp_int64 up; // upper 11806 // kmp_int64 st; // stride 11807 // }; 11808 RD = C.buildImplicitRecord("kmp_dim"); 11809 RD->startDefinition(); 11810 addFieldToRecordDecl(C, RD, Int64Ty); 11811 addFieldToRecordDecl(C, RD, Int64Ty); 11812 addFieldToRecordDecl(C, RD, Int64Ty); 11813 RD->completeDefinition(); 11814 KmpDimTy = C.getRecordType(RD); 11815 } else { 11816 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11817 } 11818 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11819 QualType ArrayTy = 11820 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11821 11822 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11823 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11824 enum { LowerFD = 0, UpperFD, StrideFD }; 11825 // Fill dims with data. 11826 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11827 LValue DimsLVal = CGF.MakeAddrLValue( 11828 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11829 // dims.upper = num_iterations; 11830 LValue UpperLVal = CGF.EmitLValueForField( 11831 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11832 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11833 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11834 Int64Ty, NumIterations[I]->getExprLoc()); 11835 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11836 // dims.stride = 1; 11837 LValue StrideLVal = CGF.EmitLValueForField( 11838 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11839 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11840 StrideLVal); 11841 } 11842 11843 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11844 // kmp_int32 num_dims, struct kmp_dim * dims); 11845 llvm::Value *Args[] = { 11846 emitUpdateLocation(CGF, D.getBeginLoc()), 11847 getThreadID(CGF, D.getBeginLoc()), 11848 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11849 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11850 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11851 CGM.VoidPtrTy)}; 11852 11853 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11854 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11855 CGF.EmitRuntimeCall(RTLFn, Args); 11856 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11857 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11858 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11859 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11860 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11861 llvm::makeArrayRef(FiniArgs)); 11862 } 11863 11864 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11865 const OMPDependClause *C) { 11866 QualType Int64Ty = 11867 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11868 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11869 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11870 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11871 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11872 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11873 const Expr *CounterVal = C->getLoopData(I); 11874 assert(CounterVal); 11875 llvm::Value *CntVal = CGF.EmitScalarConversion( 11876 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11877 CounterVal->getExprLoc()); 11878 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11879 /*Volatile=*/false, Int64Ty); 11880 } 11881 llvm::Value *Args[] = { 11882 emitUpdateLocation(CGF, C->getBeginLoc()), 11883 getThreadID(CGF, C->getBeginLoc()), 11884 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11885 llvm::FunctionCallee RTLFn; 11886 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11887 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11888 OMPRTL___kmpc_doacross_post); 11889 } else { 11890 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11891 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11892 OMPRTL___kmpc_doacross_wait); 11893 } 11894 CGF.EmitRuntimeCall(RTLFn, Args); 11895 } 11896 11897 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11898 llvm::FunctionCallee Callee, 11899 ArrayRef<llvm::Value *> Args) const { 11900 assert(Loc.isValid() && "Outlined function call location must be valid."); 11901 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11902 11903 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11904 if (Fn->doesNotThrow()) { 11905 CGF.EmitNounwindRuntimeCall(Fn, Args); 11906 return; 11907 } 11908 } 11909 CGF.EmitRuntimeCall(Callee, Args); 11910 } 11911 11912 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11913 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11914 ArrayRef<llvm::Value *> Args) const { 11915 emitCall(CGF, Loc, OutlinedFn, Args); 11916 } 11917 11918 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11919 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11920 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11921 HasEmittedDeclareTargetRegion = true; 11922 } 11923 11924 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11925 const VarDecl *NativeParam, 11926 const VarDecl *TargetParam) const { 11927 return CGF.GetAddrOfLocalVar(NativeParam); 11928 } 11929 11930 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11931 const VarDecl *VD) { 11932 if (!VD) 11933 return Address::invalid(); 11934 Address UntiedAddr = Address::invalid(); 11935 Address UntiedRealAddr = Address::invalid(); 11936 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11937 if (It != FunctionToUntiedTaskStackMap.end()) { 11938 const UntiedLocalVarsAddressesMap &UntiedData = 11939 UntiedLocalVarsStack[It->second]; 11940 auto I = UntiedData.find(VD); 11941 if (I != UntiedData.end()) { 11942 UntiedAddr = I->second.first; 11943 UntiedRealAddr = I->second.second; 11944 } 11945 } 11946 const VarDecl *CVD = VD->getCanonicalDecl(); 11947 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11948 // Use the default allocation. 11949 if (!isAllocatableDecl(VD)) 11950 return UntiedAddr; 11951 llvm::Value *Size; 11952 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11953 if (CVD->getType()->isVariablyModifiedType()) { 11954 Size = CGF.getTypeSize(CVD->getType()); 11955 // Align the size: ((size + align - 1) / align) * align 11956 Size = CGF.Builder.CreateNUWAdd( 11957 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11958 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11959 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11960 } else { 11961 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11962 Size = CGM.getSize(Sz.alignTo(Align)); 11963 } 11964 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11965 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11966 assert(AA->getAllocator() && 11967 "Expected allocator expression for non-default allocator."); 11968 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11969 // According to the standard, the original allocator type is a enum 11970 // (integer). Convert to pointer type, if required. 11971 Allocator = CGF.EmitScalarConversion( 11972 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11973 AA->getAllocator()->getExprLoc()); 11974 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11975 11976 llvm::Value *Addr = 11977 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11978 CGM.getModule(), OMPRTL___kmpc_alloc), 11979 Args, getName({CVD->getName(), ".void.addr"})); 11980 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11981 CGM.getModule(), OMPRTL___kmpc_free); 11982 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11983 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11984 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11985 if (UntiedAddr.isValid()) 11986 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11987 11988 // Cleanup action for allocate support. 11989 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11990 llvm::FunctionCallee RTLFn; 11991 unsigned LocEncoding; 11992 Address Addr; 11993 const Expr *Allocator; 11994 11995 public: 11996 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11997 Address Addr, const Expr *Allocator) 11998 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11999 Allocator(Allocator) {} 12000 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12001 if (!CGF.HaveInsertPoint()) 12002 return; 12003 llvm::Value *Args[3]; 12004 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12005 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12006 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12007 Addr.getPointer(), CGF.VoidPtrTy); 12008 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12009 // According to the standard, the original allocator type is a enum 12010 // (integer). Convert to pointer type, if required. 12011 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12012 CGF.getContext().VoidPtrTy, 12013 Allocator->getExprLoc()); 12014 Args[2] = AllocVal; 12015 12016 CGF.EmitRuntimeCall(RTLFn, Args); 12017 } 12018 }; 12019 Address VDAddr = 12020 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12021 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12022 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12023 VDAddr, AA->getAllocator()); 12024 if (UntiedRealAddr.isValid()) 12025 if (auto *Region = 12026 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12027 Region->emitUntiedSwitch(CGF); 12028 return VDAddr; 12029 } 12030 return UntiedAddr; 12031 } 12032 12033 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12034 const VarDecl *VD) const { 12035 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12036 if (It == FunctionToUntiedTaskStackMap.end()) 12037 return false; 12038 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12039 } 12040 12041 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12042 CodeGenModule &CGM, const OMPLoopDirective &S) 12043 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12044 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12045 if (!NeedToPush) 12046 return; 12047 NontemporalDeclsSet &DS = 12048 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12049 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12050 for (const Stmt *Ref : C->private_refs()) { 12051 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12052 const ValueDecl *VD; 12053 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12054 VD = DRE->getDecl(); 12055 } else { 12056 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12057 assert((ME->isImplicitCXXThis() || 12058 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12059 "Expected member of current class."); 12060 VD = ME->getMemberDecl(); 12061 } 12062 DS.insert(VD); 12063 } 12064 } 12065 } 12066 12067 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12068 if (!NeedToPush) 12069 return; 12070 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12071 } 12072 12073 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12074 CodeGenFunction &CGF, 12075 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 12076 std::pair<Address, Address>> &LocalVars) 12077 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12078 if (!NeedToPush) 12079 return; 12080 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12081 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12082 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12083 } 12084 12085 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12086 if (!NeedToPush) 12087 return; 12088 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12089 } 12090 12091 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12092 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12093 12094 return llvm::any_of( 12095 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12096 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12097 } 12098 12099 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12100 const OMPExecutableDirective &S, 12101 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12102 const { 12103 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12104 // Vars in target/task regions must be excluded completely. 12105 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12106 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12107 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12108 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12109 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12110 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12111 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12112 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12113 } 12114 } 12115 // Exclude vars in private clauses. 12116 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12117 for (const Expr *Ref : C->varlists()) { 12118 if (!Ref->getType()->isScalarType()) 12119 continue; 12120 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12121 if (!DRE) 12122 continue; 12123 NeedToCheckForLPCs.insert(DRE->getDecl()); 12124 } 12125 } 12126 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12127 for (const Expr *Ref : C->varlists()) { 12128 if (!Ref->getType()->isScalarType()) 12129 continue; 12130 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12131 if (!DRE) 12132 continue; 12133 NeedToCheckForLPCs.insert(DRE->getDecl()); 12134 } 12135 } 12136 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12137 for (const Expr *Ref : C->varlists()) { 12138 if (!Ref->getType()->isScalarType()) 12139 continue; 12140 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12141 if (!DRE) 12142 continue; 12143 NeedToCheckForLPCs.insert(DRE->getDecl()); 12144 } 12145 } 12146 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12147 for (const Expr *Ref : C->varlists()) { 12148 if (!Ref->getType()->isScalarType()) 12149 continue; 12150 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12151 if (!DRE) 12152 continue; 12153 NeedToCheckForLPCs.insert(DRE->getDecl()); 12154 } 12155 } 12156 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12157 for (const Expr *Ref : C->varlists()) { 12158 if (!Ref->getType()->isScalarType()) 12159 continue; 12160 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12161 if (!DRE) 12162 continue; 12163 NeedToCheckForLPCs.insert(DRE->getDecl()); 12164 } 12165 } 12166 for (const Decl *VD : NeedToCheckForLPCs) { 12167 for (const LastprivateConditionalData &Data : 12168 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12169 if (Data.DeclToUniqueName.count(VD) > 0) { 12170 if (!Data.Disabled) 12171 NeedToAddForLPCsAsDisabled.insert(VD); 12172 break; 12173 } 12174 } 12175 } 12176 } 12177 12178 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12179 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12180 : CGM(CGF.CGM), 12181 Action((CGM.getLangOpts().OpenMP >= 50 && 12182 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12183 [](const OMPLastprivateClause *C) { 12184 return C->getKind() == 12185 OMPC_LASTPRIVATE_conditional; 12186 })) 12187 ? ActionToDo::PushAsLastprivateConditional 12188 : ActionToDo::DoNotPush) { 12189 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12190 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12191 return; 12192 assert(Action == ActionToDo::PushAsLastprivateConditional && 12193 "Expected a push action."); 12194 LastprivateConditionalData &Data = 12195 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12196 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12197 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12198 continue; 12199 12200 for (const Expr *Ref : C->varlists()) { 12201 Data.DeclToUniqueName.insert(std::make_pair( 12202 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12203 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12204 } 12205 } 12206 Data.IVLVal = IVLVal; 12207 Data.Fn = CGF.CurFn; 12208 } 12209 12210 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12211 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12212 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12213 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12214 if (CGM.getLangOpts().OpenMP < 50) 12215 return; 12216 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12217 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12218 if (!NeedToAddForLPCsAsDisabled.empty()) { 12219 Action = ActionToDo::DisableLastprivateConditional; 12220 LastprivateConditionalData &Data = 12221 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12222 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12223 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12224 Data.Fn = CGF.CurFn; 12225 Data.Disabled = true; 12226 } 12227 } 12228 12229 CGOpenMPRuntime::LastprivateConditionalRAII 12230 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12231 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12232 return LastprivateConditionalRAII(CGF, S); 12233 } 12234 12235 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12236 if (CGM.getLangOpts().OpenMP < 50) 12237 return; 12238 if (Action == ActionToDo::DisableLastprivateConditional) { 12239 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12240 "Expected list of disabled private vars."); 12241 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12242 } 12243 if (Action == ActionToDo::PushAsLastprivateConditional) { 12244 assert( 12245 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12246 "Expected list of lastprivate conditional vars."); 12247 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12248 } 12249 } 12250 12251 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12252 const VarDecl *VD) { 12253 ASTContext &C = CGM.getContext(); 12254 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12255 if (I == LastprivateConditionalToTypes.end()) 12256 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12257 QualType NewType; 12258 const FieldDecl *VDField; 12259 const FieldDecl *FiredField; 12260 LValue BaseLVal; 12261 auto VI = I->getSecond().find(VD); 12262 if (VI == I->getSecond().end()) { 12263 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12264 RD->startDefinition(); 12265 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12266 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12267 RD->completeDefinition(); 12268 NewType = C.getRecordType(RD); 12269 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12270 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12271 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12272 } else { 12273 NewType = std::get<0>(VI->getSecond()); 12274 VDField = std::get<1>(VI->getSecond()); 12275 FiredField = std::get<2>(VI->getSecond()); 12276 BaseLVal = std::get<3>(VI->getSecond()); 12277 } 12278 LValue FiredLVal = 12279 CGF.EmitLValueForField(BaseLVal, FiredField); 12280 CGF.EmitStoreOfScalar( 12281 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12282 FiredLVal); 12283 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12284 } 12285 12286 namespace { 12287 /// Checks if the lastprivate conditional variable is referenced in LHS. 12288 class LastprivateConditionalRefChecker final 12289 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12290 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12291 const Expr *FoundE = nullptr; 12292 const Decl *FoundD = nullptr; 12293 StringRef UniqueDeclName; 12294 LValue IVLVal; 12295 llvm::Function *FoundFn = nullptr; 12296 SourceLocation Loc; 12297 12298 public: 12299 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12300 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12301 llvm::reverse(LPM)) { 12302 auto It = D.DeclToUniqueName.find(E->getDecl()); 12303 if (It == D.DeclToUniqueName.end()) 12304 continue; 12305 if (D.Disabled) 12306 return false; 12307 FoundE = E; 12308 FoundD = E->getDecl()->getCanonicalDecl(); 12309 UniqueDeclName = It->second; 12310 IVLVal = D.IVLVal; 12311 FoundFn = D.Fn; 12312 break; 12313 } 12314 return FoundE == E; 12315 } 12316 bool VisitMemberExpr(const MemberExpr *E) { 12317 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12318 return false; 12319 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12320 llvm::reverse(LPM)) { 12321 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12322 if (It == D.DeclToUniqueName.end()) 12323 continue; 12324 if (D.Disabled) 12325 return false; 12326 FoundE = E; 12327 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12328 UniqueDeclName = It->second; 12329 IVLVal = D.IVLVal; 12330 FoundFn = D.Fn; 12331 break; 12332 } 12333 return FoundE == E; 12334 } 12335 bool VisitStmt(const Stmt *S) { 12336 for (const Stmt *Child : S->children()) { 12337 if (!Child) 12338 continue; 12339 if (const auto *E = dyn_cast<Expr>(Child)) 12340 if (!E->isGLValue()) 12341 continue; 12342 if (Visit(Child)) 12343 return true; 12344 } 12345 return false; 12346 } 12347 explicit LastprivateConditionalRefChecker( 12348 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12349 : LPM(LPM) {} 12350 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12351 getFoundData() const { 12352 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12353 } 12354 }; 12355 } // namespace 12356 12357 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12358 LValue IVLVal, 12359 StringRef UniqueDeclName, 12360 LValue LVal, 12361 SourceLocation Loc) { 12362 // Last updated loop counter for the lastprivate conditional var. 12363 // int<xx> last_iv = 0; 12364 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12365 llvm::Constant *LastIV = 12366 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12367 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12368 IVLVal.getAlignment().getAsAlign()); 12369 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12370 12371 // Last value of the lastprivate conditional. 12372 // decltype(priv_a) last_a; 12373 llvm::Constant *Last = getOrCreateInternalVariable( 12374 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12375 cast<llvm::GlobalVariable>(Last)->setAlignment( 12376 LVal.getAlignment().getAsAlign()); 12377 LValue LastLVal = 12378 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12379 12380 // Global loop counter. Required to handle inner parallel-for regions. 12381 // iv 12382 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12383 12384 // #pragma omp critical(a) 12385 // if (last_iv <= iv) { 12386 // last_iv = iv; 12387 // last_a = priv_a; 12388 // } 12389 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12390 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12391 Action.Enter(CGF); 12392 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12393 // (last_iv <= iv) ? Check if the variable is updated and store new 12394 // value in global var. 12395 llvm::Value *CmpRes; 12396 if (IVLVal.getType()->isSignedIntegerType()) { 12397 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12398 } else { 12399 assert(IVLVal.getType()->isUnsignedIntegerType() && 12400 "Loop iteration variable must be integer."); 12401 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12402 } 12403 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12404 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12405 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12406 // { 12407 CGF.EmitBlock(ThenBB); 12408 12409 // last_iv = iv; 12410 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12411 12412 // last_a = priv_a; 12413 switch (CGF.getEvaluationKind(LVal.getType())) { 12414 case TEK_Scalar: { 12415 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12416 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12417 break; 12418 } 12419 case TEK_Complex: { 12420 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12421 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12422 break; 12423 } 12424 case TEK_Aggregate: 12425 llvm_unreachable( 12426 "Aggregates are not supported in lastprivate conditional."); 12427 } 12428 // } 12429 CGF.EmitBranch(ExitBB); 12430 // There is no need to emit line number for unconditional branch. 12431 (void)ApplyDebugLocation::CreateEmpty(CGF); 12432 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12433 }; 12434 12435 if (CGM.getLangOpts().OpenMPSimd) { 12436 // Do not emit as a critical region as no parallel region could be emitted. 12437 RegionCodeGenTy ThenRCG(CodeGen); 12438 ThenRCG(CGF); 12439 } else { 12440 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12441 } 12442 } 12443 12444 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12445 const Expr *LHS) { 12446 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12447 return; 12448 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12449 if (!Checker.Visit(LHS)) 12450 return; 12451 const Expr *FoundE; 12452 const Decl *FoundD; 12453 StringRef UniqueDeclName; 12454 LValue IVLVal; 12455 llvm::Function *FoundFn; 12456 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12457 Checker.getFoundData(); 12458 if (FoundFn != CGF.CurFn) { 12459 // Special codegen for inner parallel regions. 12460 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12461 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12462 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12463 "Lastprivate conditional is not found in outer region."); 12464 QualType StructTy = std::get<0>(It->getSecond()); 12465 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12466 LValue PrivLVal = CGF.EmitLValue(FoundE); 12467 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12468 PrivLVal.getAddress(CGF), 12469 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12470 LValue BaseLVal = 12471 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12472 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12473 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12474 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12475 FiredLVal, llvm::AtomicOrdering::Unordered, 12476 /*IsVolatile=*/true, /*isInit=*/false); 12477 return; 12478 } 12479 12480 // Private address of the lastprivate conditional in the current context. 12481 // priv_a 12482 LValue LVal = CGF.EmitLValue(FoundE); 12483 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12484 FoundE->getExprLoc()); 12485 } 12486 12487 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12488 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12489 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12490 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12491 return; 12492 auto Range = llvm::reverse(LastprivateConditionalStack); 12493 auto It = llvm::find_if( 12494 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12495 if (It == Range.end() || It->Fn != CGF.CurFn) 12496 return; 12497 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12498 assert(LPCI != LastprivateConditionalToTypes.end() && 12499 "Lastprivates must be registered already."); 12500 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12501 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12502 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12503 for (const auto &Pair : It->DeclToUniqueName) { 12504 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12505 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12506 continue; 12507 auto I = LPCI->getSecond().find(Pair.first); 12508 assert(I != LPCI->getSecond().end() && 12509 "Lastprivate must be rehistered already."); 12510 // bool Cmp = priv_a.Fired != 0; 12511 LValue BaseLVal = std::get<3>(I->getSecond()); 12512 LValue FiredLVal = 12513 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12514 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12515 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12516 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12517 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12518 // if (Cmp) { 12519 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12520 CGF.EmitBlock(ThenBB); 12521 Address Addr = CGF.GetAddrOfLocalVar(VD); 12522 LValue LVal; 12523 if (VD->getType()->isReferenceType()) 12524 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12525 AlignmentSource::Decl); 12526 else 12527 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12528 AlignmentSource::Decl); 12529 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12530 D.getBeginLoc()); 12531 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12532 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12533 // } 12534 } 12535 } 12536 12537 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12538 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12539 SourceLocation Loc) { 12540 if (CGF.getLangOpts().OpenMP < 50) 12541 return; 12542 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12543 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12544 "Unknown lastprivate conditional variable."); 12545 StringRef UniqueName = It->second; 12546 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12547 // The variable was not updated in the region - exit. 12548 if (!GV) 12549 return; 12550 LValue LPLVal = CGF.MakeAddrLValue( 12551 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12552 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12553 CGF.EmitStoreOfScalar(Res, PrivLVal); 12554 } 12555 12556 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12557 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12558 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12559 llvm_unreachable("Not supported in SIMD-only mode"); 12560 } 12561 12562 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12563 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12564 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12565 llvm_unreachable("Not supported in SIMD-only mode"); 12566 } 12567 12568 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12569 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12570 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12571 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12572 bool Tied, unsigned &NumberOfParts) { 12573 llvm_unreachable("Not supported in SIMD-only mode"); 12574 } 12575 12576 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12577 SourceLocation Loc, 12578 llvm::Function *OutlinedFn, 12579 ArrayRef<llvm::Value *> CapturedVars, 12580 const Expr *IfCond) { 12581 llvm_unreachable("Not supported in SIMD-only mode"); 12582 } 12583 12584 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12585 CodeGenFunction &CGF, StringRef CriticalName, 12586 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12587 const Expr *Hint) { 12588 llvm_unreachable("Not supported in SIMD-only mode"); 12589 } 12590 12591 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12592 const RegionCodeGenTy &MasterOpGen, 12593 SourceLocation Loc) { 12594 llvm_unreachable("Not supported in SIMD-only mode"); 12595 } 12596 12597 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12598 SourceLocation Loc) { 12599 llvm_unreachable("Not supported in SIMD-only mode"); 12600 } 12601 12602 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12603 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12604 SourceLocation Loc) { 12605 llvm_unreachable("Not supported in SIMD-only mode"); 12606 } 12607 12608 void CGOpenMPSIMDRuntime::emitSingleRegion( 12609 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12610 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12611 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12612 ArrayRef<const Expr *> AssignmentOps) { 12613 llvm_unreachable("Not supported in SIMD-only mode"); 12614 } 12615 12616 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12617 const RegionCodeGenTy &OrderedOpGen, 12618 SourceLocation Loc, 12619 bool IsThreads) { 12620 llvm_unreachable("Not supported in SIMD-only mode"); 12621 } 12622 12623 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12624 SourceLocation Loc, 12625 OpenMPDirectiveKind Kind, 12626 bool EmitChecks, 12627 bool ForceSimpleCall) { 12628 llvm_unreachable("Not supported in SIMD-only mode"); 12629 } 12630 12631 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12632 CodeGenFunction &CGF, SourceLocation Loc, 12633 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12634 bool Ordered, const DispatchRTInput &DispatchValues) { 12635 llvm_unreachable("Not supported in SIMD-only mode"); 12636 } 12637 12638 void CGOpenMPSIMDRuntime::emitForStaticInit( 12639 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12640 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12641 llvm_unreachable("Not supported in SIMD-only mode"); 12642 } 12643 12644 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12645 CodeGenFunction &CGF, SourceLocation Loc, 12646 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12647 llvm_unreachable("Not supported in SIMD-only mode"); 12648 } 12649 12650 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12651 SourceLocation Loc, 12652 unsigned IVSize, 12653 bool IVSigned) { 12654 llvm_unreachable("Not supported in SIMD-only mode"); 12655 } 12656 12657 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12658 SourceLocation Loc, 12659 OpenMPDirectiveKind DKind) { 12660 llvm_unreachable("Not supported in SIMD-only mode"); 12661 } 12662 12663 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12664 SourceLocation Loc, 12665 unsigned IVSize, bool IVSigned, 12666 Address IL, Address LB, 12667 Address UB, Address ST) { 12668 llvm_unreachable("Not supported in SIMD-only mode"); 12669 } 12670 12671 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12672 llvm::Value *NumThreads, 12673 SourceLocation Loc) { 12674 llvm_unreachable("Not supported in SIMD-only mode"); 12675 } 12676 12677 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12678 ProcBindKind ProcBind, 12679 SourceLocation Loc) { 12680 llvm_unreachable("Not supported in SIMD-only mode"); 12681 } 12682 12683 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12684 const VarDecl *VD, 12685 Address VDAddr, 12686 SourceLocation Loc) { 12687 llvm_unreachable("Not supported in SIMD-only mode"); 12688 } 12689 12690 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12691 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12692 CodeGenFunction *CGF) { 12693 llvm_unreachable("Not supported in SIMD-only mode"); 12694 } 12695 12696 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12697 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12698 llvm_unreachable("Not supported in SIMD-only mode"); 12699 } 12700 12701 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12702 ArrayRef<const Expr *> Vars, 12703 SourceLocation Loc, 12704 llvm::AtomicOrdering AO) { 12705 llvm_unreachable("Not supported in SIMD-only mode"); 12706 } 12707 12708 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12709 const OMPExecutableDirective &D, 12710 llvm::Function *TaskFunction, 12711 QualType SharedsTy, Address Shareds, 12712 const Expr *IfCond, 12713 const OMPTaskDataTy &Data) { 12714 llvm_unreachable("Not supported in SIMD-only mode"); 12715 } 12716 12717 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12718 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12719 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12720 const Expr *IfCond, const OMPTaskDataTy &Data) { 12721 llvm_unreachable("Not supported in SIMD-only mode"); 12722 } 12723 12724 void CGOpenMPSIMDRuntime::emitReduction( 12725 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12726 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12727 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12728 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12729 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12730 ReductionOps, Options); 12731 } 12732 12733 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12734 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12735 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12736 llvm_unreachable("Not supported in SIMD-only mode"); 12737 } 12738 12739 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12740 SourceLocation Loc, 12741 bool IsWorksharingReduction) { 12742 llvm_unreachable("Not supported in SIMD-only mode"); 12743 } 12744 12745 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12746 SourceLocation Loc, 12747 ReductionCodeGen &RCG, 12748 unsigned N) { 12749 llvm_unreachable("Not supported in SIMD-only mode"); 12750 } 12751 12752 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12753 SourceLocation Loc, 12754 llvm::Value *ReductionsPtr, 12755 LValue SharedLVal) { 12756 llvm_unreachable("Not supported in SIMD-only mode"); 12757 } 12758 12759 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12760 SourceLocation Loc) { 12761 llvm_unreachable("Not supported in SIMD-only mode"); 12762 } 12763 12764 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12765 CodeGenFunction &CGF, SourceLocation Loc, 12766 OpenMPDirectiveKind CancelRegion) { 12767 llvm_unreachable("Not supported in SIMD-only mode"); 12768 } 12769 12770 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12771 SourceLocation Loc, const Expr *IfCond, 12772 OpenMPDirectiveKind CancelRegion) { 12773 llvm_unreachable("Not supported in SIMD-only mode"); 12774 } 12775 12776 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12777 const OMPExecutableDirective &D, StringRef ParentName, 12778 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12779 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12780 llvm_unreachable("Not supported in SIMD-only mode"); 12781 } 12782 12783 void CGOpenMPSIMDRuntime::emitTargetCall( 12784 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12785 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12786 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12787 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12788 const OMPLoopDirective &D)> 12789 SizeEmitter) { 12790 llvm_unreachable("Not supported in SIMD-only mode"); 12791 } 12792 12793 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12794 llvm_unreachable("Not supported in SIMD-only mode"); 12795 } 12796 12797 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12798 llvm_unreachable("Not supported in SIMD-only mode"); 12799 } 12800 12801 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12802 return false; 12803 } 12804 12805 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12806 const OMPExecutableDirective &D, 12807 SourceLocation Loc, 12808 llvm::Function *OutlinedFn, 12809 ArrayRef<llvm::Value *> CapturedVars) { 12810 llvm_unreachable("Not supported in SIMD-only mode"); 12811 } 12812 12813 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12814 const Expr *NumTeams, 12815 const Expr *ThreadLimit, 12816 SourceLocation Loc) { 12817 llvm_unreachable("Not supported in SIMD-only mode"); 12818 } 12819 12820 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12821 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12822 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12823 llvm_unreachable("Not supported in SIMD-only mode"); 12824 } 12825 12826 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12827 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12828 const Expr *Device) { 12829 llvm_unreachable("Not supported in SIMD-only mode"); 12830 } 12831 12832 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12833 const OMPLoopDirective &D, 12834 ArrayRef<Expr *> NumIterations) { 12835 llvm_unreachable("Not supported in SIMD-only mode"); 12836 } 12837 12838 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12839 const OMPDependClause *C) { 12840 llvm_unreachable("Not supported in SIMD-only mode"); 12841 } 12842 12843 const VarDecl * 12844 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12845 const VarDecl *NativeParam) const { 12846 llvm_unreachable("Not supported in SIMD-only mode"); 12847 } 12848 12849 Address 12850 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12851 const VarDecl *NativeParam, 12852 const VarDecl *TargetParam) const { 12853 llvm_unreachable("Not supported in SIMD-only mode"); 12854 } 12855