1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/APValue.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtVisitor.h" 24 #include "clang/Basic/BitmaskEnum.h" 25 #include "clang/Basic/FileManager.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SetOperations.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415 public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448 }; 449 450 /// Values for bit flags used in the ident_t to describe the fields. 451 /// All enumeric elements are named and described in accordance with the code 452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453 enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477 }; 478 479 namespace { 480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481 /// Values for bit flags for marking which requires clauses have been used. 482 enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496 }; 497 498 enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502 }; 503 } // anonymous namespace 504 505 /// Describes ident structure that describes a source location. 506 /// All descriptions are taken from 507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508 /// Original structure: 509 /// typedef struct ident { 510 /// kmp_int32 reserved_1; /**< might be used in Fortran; 511 /// see above */ 512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513 /// KMP_IDENT_KMPC identifies this union 514 /// member */ 515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516 /// see above */ 517 ///#if USE_ITT_BUILD 518 /// /* but currently used for storing 519 /// region-specific ITT */ 520 /// /* contextual information. */ 521 ///#endif /* USE_ITT_BUILD */ 522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523 /// C++ */ 524 /// char const *psource; /**< String describing the source location. 525 /// The string is composed of semi-colon separated 526 // fields which describe the source file, 527 /// the function and a pair of line numbers that 528 /// delimit the construct. 529 /// */ 530 /// } ident_t; 531 enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544 }; 545 546 /// Schedule types for 'omp for' loops (these enumerators are taken from 547 /// the enum sched_type in kmp.h). 548 enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576 }; 577 578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579 /// region. 580 class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583 public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590 }; 591 592 } // anonymous namespace 593 594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603 } 604 605 /// Check if the combiner is a call to UDR combiner and if it is so return the 606 /// UDR decl used for reduction. 607 static const OMPDeclareReductionDecl * 608 getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616 } 617 618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 753 // Check whether we've reached the end. 754 llvm::Value *Done = 755 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 756 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 757 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 758 759 // Done. 760 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 761 } 762 763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 764 return CGF.EmitOMPSharedLValue(E); 765 } 766 767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 768 const Expr *E) { 769 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 770 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 771 return LValue(); 772 } 773 774 void ReductionCodeGen::emitAggregateInitialization( 775 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 776 const OMPDeclareReductionDecl *DRD) { 777 // Emit VarDecl with copy init for arrays. 778 // Get the address of the original variable captured in current 779 // captured region. 780 const auto *PrivateVD = 781 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 782 bool EmitDeclareReductionInit = 783 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 784 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 785 EmitDeclareReductionInit, 786 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 787 : PrivateVD->getInit(), 788 DRD, SharedLVal.getAddress(CGF)); 789 } 790 791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 792 ArrayRef<const Expr *> Origs, 793 ArrayRef<const Expr *> Privates, 794 ArrayRef<const Expr *> ReductionOps) { 795 ClausesData.reserve(Shareds.size()); 796 SharedAddresses.reserve(Shareds.size()); 797 Sizes.reserve(Shareds.size()); 798 BaseDecls.reserve(Shareds.size()); 799 const auto *IOrig = Origs.begin(); 800 const auto *IPriv = Privates.begin(); 801 const auto *IRed = ReductionOps.begin(); 802 for (const Expr *Ref : Shareds) { 803 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 804 std::advance(IOrig, 1); 805 std::advance(IPriv, 1); 806 std::advance(IRed, 1); 807 } 808 } 809 810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 811 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 812 "Number of generated lvalues must be exactly N."); 813 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 814 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 815 SharedAddresses.emplace_back(First, Second); 816 if (ClausesData[N].Shared == ClausesData[N].Ref) { 817 OrigAddresses.emplace_back(First, Second); 818 } else { 819 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 820 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 821 OrigAddresses.emplace_back(First, Second); 822 } 823 } 824 825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 826 const auto *PrivateVD = 827 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 828 QualType PrivateType = PrivateVD->getType(); 829 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 830 if (!PrivateType->isVariablyModifiedType()) { 831 Sizes.emplace_back( 832 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 833 nullptr); 834 return; 835 } 836 llvm::Value *Size; 837 llvm::Value *SizeInChars; 838 auto *ElemType = 839 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 840 ->getElementType(); 841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 842 if (AsArraySection) { 843 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 844 OrigAddresses[N].first.getPointer(CGF)); 845 Size = CGF.Builder.CreateNUWAdd( 846 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 847 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 848 } else { 849 SizeInChars = 850 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 851 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 852 } 853 Sizes.emplace_back(SizeInChars, Size); 854 CodeGenFunction::OpaqueValueMapping OpaqueMap( 855 CGF, 856 cast<OpaqueValueExpr>( 857 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 858 RValue::get(Size)); 859 CGF.EmitVariablyModifiedType(PrivateType); 860 } 861 862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 863 llvm::Value *Size) { 864 const auto *PrivateVD = 865 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 866 QualType PrivateType = PrivateVD->getType(); 867 if (!PrivateType->isVariablyModifiedType()) { 868 assert(!Size && !Sizes[N].second && 869 "Size should be nullptr for non-variably modified reduction " 870 "items."); 871 return; 872 } 873 CodeGenFunction::OpaqueValueMapping OpaqueMap( 874 CGF, 875 cast<OpaqueValueExpr>( 876 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 877 RValue::get(Size)); 878 CGF.EmitVariablyModifiedType(PrivateType); 879 } 880 881 void ReductionCodeGen::emitInitialization( 882 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 883 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 884 assert(SharedAddresses.size() > N && "No variable was generated"); 885 const auto *PrivateVD = 886 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 887 const OMPDeclareReductionDecl *DRD = 888 getReductionInit(ClausesData[N].ReductionOp); 889 QualType PrivateType = PrivateVD->getType(); 890 PrivateAddr = CGF.Builder.CreateElementBitCast( 891 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 892 QualType SharedType = SharedAddresses[N].first.getType(); 893 SharedLVal = CGF.MakeAddrLValue( 894 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 895 CGF.ConvertTypeForMem(SharedType)), 896 SharedType, SharedAddresses[N].first.getBaseInfo(), 897 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 898 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 899 if (DRD && DRD->getInitializer()) 900 (void)DefaultInit(CGF); 901 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 902 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 903 (void)DefaultInit(CGF); 904 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 905 PrivateAddr, SharedLVal.getAddress(CGF), 906 SharedLVal.getType()); 907 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 908 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 909 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 910 PrivateVD->getType().getQualifiers(), 911 /*IsInitializer=*/false); 912 } 913 } 914 915 bool ReductionCodeGen::needCleanups(unsigned N) { 916 const auto *PrivateVD = 917 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 918 QualType PrivateType = PrivateVD->getType(); 919 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 920 return DTorKind != QualType::DK_none; 921 } 922 923 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 924 Address PrivateAddr) { 925 const auto *PrivateVD = 926 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 927 QualType PrivateType = PrivateVD->getType(); 928 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 929 if (needCleanups(N)) { 930 PrivateAddr = CGF.Builder.CreateElementBitCast( 931 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 932 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 933 } 934 } 935 936 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 937 LValue BaseLV) { 938 BaseTy = BaseTy.getNonReferenceType(); 939 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 940 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 941 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 942 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 943 } else { 944 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 945 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 946 } 947 BaseTy = BaseTy->getPointeeType(); 948 } 949 return CGF.MakeAddrLValue( 950 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 951 CGF.ConvertTypeForMem(ElTy)), 952 BaseLV.getType(), BaseLV.getBaseInfo(), 953 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 954 } 955 956 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 957 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 958 llvm::Value *Addr) { 959 Address Tmp = Address::invalid(); 960 Address TopTmp = Address::invalid(); 961 Address MostTopTmp = Address::invalid(); 962 BaseTy = BaseTy.getNonReferenceType(); 963 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 964 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 965 Tmp = CGF.CreateMemTemp(BaseTy); 966 if (TopTmp.isValid()) 967 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 968 else 969 MostTopTmp = Tmp; 970 TopTmp = Tmp; 971 BaseTy = BaseTy->getPointeeType(); 972 } 973 llvm::Type *Ty = BaseLVType; 974 if (Tmp.isValid()) 975 Ty = Tmp.getElementType(); 976 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 977 if (Tmp.isValid()) { 978 CGF.Builder.CreateStore(Addr, Tmp); 979 return MostTopTmp; 980 } 981 return Address(Addr, BaseLVAlignment); 982 } 983 984 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 985 const VarDecl *OrigVD = nullptr; 986 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 987 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 988 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 989 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 990 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 991 Base = TempASE->getBase()->IgnoreParenImpCasts(); 992 DE = cast<DeclRefExpr>(Base); 993 OrigVD = cast<VarDecl>(DE->getDecl()); 994 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 995 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 996 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 997 Base = TempASE->getBase()->IgnoreParenImpCasts(); 998 DE = cast<DeclRefExpr>(Base); 999 OrigVD = cast<VarDecl>(DE->getDecl()); 1000 } 1001 return OrigVD; 1002 } 1003 1004 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1005 Address PrivateAddr) { 1006 const DeclRefExpr *DE; 1007 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1008 BaseDecls.emplace_back(OrigVD); 1009 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1010 LValue BaseLValue = 1011 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1012 OriginalBaseLValue); 1013 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1014 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1015 llvm::Value *PrivatePointer = 1016 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1017 PrivateAddr.getPointer(), 1018 SharedAddresses[N].first.getAddress(CGF).getType()); 1019 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1020 return castToBase(CGF, OrigVD->getType(), 1021 SharedAddresses[N].first.getType(), 1022 OriginalBaseLValue.getAddress(CGF).getType(), 1023 OriginalBaseLValue.getAlignment(), Ptr); 1024 } 1025 BaseDecls.emplace_back( 1026 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1027 return PrivateAddr; 1028 } 1029 1030 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1031 const OMPDeclareReductionDecl *DRD = 1032 getReductionInit(ClausesData[N].ReductionOp); 1033 return DRD && DRD->getInitializer(); 1034 } 1035 1036 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1037 return CGF.EmitLoadOfPointerLValue( 1038 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1039 getThreadIDVariable()->getType()->castAs<PointerType>()); 1040 } 1041 1042 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1043 if (!CGF.HaveInsertPoint()) 1044 return; 1045 // 1.2.2 OpenMP Language Terminology 1046 // Structured block - An executable statement with a single entry at the 1047 // top and a single exit at the bottom. 1048 // The point of exit cannot be a branch out of the structured block. 1049 // longjmp() and throw() must not violate the entry/exit criteria. 1050 CGF.EHStack.pushTerminate(); 1051 if (S) 1052 CGF.incrementProfileCounter(S); 1053 CodeGen(CGF); 1054 CGF.EHStack.popTerminate(); 1055 } 1056 1057 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1058 CodeGenFunction &CGF) { 1059 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1060 getThreadIDVariable()->getType(), 1061 AlignmentSource::Decl); 1062 } 1063 1064 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1065 QualType FieldTy) { 1066 auto *Field = FieldDecl::Create( 1067 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1068 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1069 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1070 Field->setAccess(AS_public); 1071 DC->addDecl(Field); 1072 return Field; 1073 } 1074 1075 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1076 StringRef Separator) 1077 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1078 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1079 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1080 1081 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1082 OMPBuilder.initialize(); 1083 loadOffloadInfoMetadata(); 1084 } 1085 1086 void CGOpenMPRuntime::clear() { 1087 InternalVars.clear(); 1088 // Clean non-target variable declarations possibly used only in debug info. 1089 for (const auto &Data : EmittedNonTargetVariables) { 1090 if (!Data.getValue().pointsToAliveValue()) 1091 continue; 1092 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1093 if (!GV) 1094 continue; 1095 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1096 continue; 1097 GV->eraseFromParent(); 1098 } 1099 } 1100 1101 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1102 SmallString<128> Buffer; 1103 llvm::raw_svector_ostream OS(Buffer); 1104 StringRef Sep = FirstSeparator; 1105 for (StringRef Part : Parts) { 1106 OS << Sep << Part; 1107 Sep = Separator; 1108 } 1109 return std::string(OS.str()); 1110 } 1111 1112 static llvm::Function * 1113 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1114 const Expr *CombinerInitializer, const VarDecl *In, 1115 const VarDecl *Out, bool IsCombiner) { 1116 // void .omp_combiner.(Ty *in, Ty *out); 1117 ASTContext &C = CGM.getContext(); 1118 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1119 FunctionArgList Args; 1120 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1121 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1122 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 Args.push_back(&OmpOutParm); 1125 Args.push_back(&OmpInParm); 1126 const CGFunctionInfo &FnInfo = 1127 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1128 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1129 std::string Name = CGM.getOpenMPRuntime().getName( 1130 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1131 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1132 Name, &CGM.getModule()); 1133 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1134 if (CGM.getLangOpts().Optimize) { 1135 Fn->removeFnAttr(llvm::Attribute::NoInline); 1136 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1137 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1138 } 1139 CodeGenFunction CGF(CGM); 1140 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1141 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1142 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1143 Out->getLocation()); 1144 CodeGenFunction::OMPPrivateScope Scope(CGF); 1145 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1146 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1147 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1148 .getAddress(CGF); 1149 }); 1150 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1151 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 (void)Scope.Privatize(); 1156 if (!IsCombiner && Out->hasInit() && 1157 !CGF.isTrivialInitializer(Out->getInit())) { 1158 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1159 Out->getType().getQualifiers(), 1160 /*IsInitializer=*/true); 1161 } 1162 if (CombinerInitializer) 1163 CGF.EmitIgnoredExpr(CombinerInitializer); 1164 Scope.ForceCleanup(); 1165 CGF.FinishFunction(); 1166 return Fn; 1167 } 1168 1169 void CGOpenMPRuntime::emitUserDefinedReduction( 1170 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1171 if (UDRMap.count(D) > 0) 1172 return; 1173 llvm::Function *Combiner = emitCombinerOrInitializer( 1174 CGM, D->getType(), D->getCombiner(), 1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1176 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1177 /*IsCombiner=*/true); 1178 llvm::Function *Initializer = nullptr; 1179 if (const Expr *Init = D->getInitializer()) { 1180 Initializer = emitCombinerOrInitializer( 1181 CGM, D->getType(), 1182 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1183 : nullptr, 1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1185 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1186 /*IsCombiner=*/false); 1187 } 1188 UDRMap.try_emplace(D, Combiner, Initializer); 1189 if (CGF) { 1190 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1191 Decls.second.push_back(D); 1192 } 1193 } 1194 1195 std::pair<llvm::Function *, llvm::Function *> 1196 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1197 auto I = UDRMap.find(D); 1198 if (I != UDRMap.end()) 1199 return I->second; 1200 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1201 return UDRMap.lookup(D); 1202 } 1203 1204 namespace { 1205 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1206 // Builder if one is present. 1207 struct PushAndPopStackRAII { 1208 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1209 bool HasCancel, llvm::omp::Directive Kind) 1210 : OMPBuilder(OMPBuilder) { 1211 if (!OMPBuilder) 1212 return; 1213 1214 // The following callback is the crucial part of clangs cleanup process. 1215 // 1216 // NOTE: 1217 // Once the OpenMPIRBuilder is used to create parallel regions (and 1218 // similar), the cancellation destination (Dest below) is determined via 1219 // IP. That means if we have variables to finalize we split the block at IP, 1220 // use the new block (=BB) as destination to build a JumpDest (via 1221 // getJumpDestInCurrentScope(BB)) which then is fed to 1222 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1223 // to push & pop an FinalizationInfo object. 1224 // The FiniCB will still be needed but at the point where the 1225 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1226 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1227 assert(IP.getBlock()->end() == IP.getPoint() && 1228 "Clang CG should cause non-terminated block!"); 1229 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1230 CGF.Builder.restoreIP(IP); 1231 CodeGenFunction::JumpDest Dest = 1232 CGF.getOMPCancelDestination(OMPD_parallel); 1233 CGF.EmitBranchThroughCleanup(Dest); 1234 }; 1235 1236 // TODO: Remove this once we emit parallel regions through the 1237 // OpenMPIRBuilder as it can do this setup internally. 1238 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1239 OMPBuilder->pushFinalizationCB(std::move(FI)); 1240 } 1241 ~PushAndPopStackRAII() { 1242 if (OMPBuilder) 1243 OMPBuilder->popFinalizationCB(); 1244 } 1245 llvm::OpenMPIRBuilder *OMPBuilder; 1246 }; 1247 } // namespace 1248 1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1250 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1252 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1253 assert(ThreadIDVar->getType()->isPointerType() && 1254 "thread id variable must be of type kmp_int32 *"); 1255 CodeGenFunction CGF(CGM, true); 1256 bool HasCancel = false; 1257 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1260 HasCancel = OPD->hasCancel(); 1261 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1262 HasCancel = OPSD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 1276 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1277 // parallel region to make cancellation barriers work properly. 1278 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1279 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1280 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1281 HasCancel, OutlinedHelperName); 1282 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1283 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1284 } 1285 1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1287 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1289 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1290 return emitParallelOrTeamsOutlinedFunction( 1291 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1292 } 1293 1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1297 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1298 return emitParallelOrTeamsOutlinedFunction( 1299 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1300 } 1301 1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1303 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1304 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1305 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1306 bool Tied, unsigned &NumberOfParts) { 1307 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1308 PrePostActionTy &) { 1309 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1310 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1311 llvm::Value *TaskArgs[] = { 1312 UpLoc, ThreadID, 1313 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1314 TaskTVar->getType()->castAs<PointerType>()) 1315 .getPointer(CGF)}; 1316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1317 CGM.getModule(), OMPRTL___kmpc_omp_task), 1318 TaskArgs); 1319 }; 1320 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1321 UntiedCodeGen); 1322 CodeGen.setAction(Action); 1323 assert(!ThreadIDVar->getType()->isPointerType() && 1324 "thread id variable must be of type kmp_int32 for tasks"); 1325 const OpenMPDirectiveKind Region = 1326 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1327 : OMPD_task; 1328 const CapturedStmt *CS = D.getCapturedStmt(Region); 1329 bool HasCancel = false; 1330 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 1339 CodeGenFunction CGF(CGM, true); 1340 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1341 InnermostKind, HasCancel, Action); 1342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1343 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1344 if (!Tied) 1345 NumberOfParts = Action.getNumberOfParts(); 1346 return Res; 1347 } 1348 1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1350 const RecordDecl *RD, const CGRecordLayout &RL, 1351 ArrayRef<llvm::Constant *> Data) { 1352 llvm::StructType *StructTy = RL.getLLVMType(); 1353 unsigned PrevIdx = 0; 1354 ConstantInitBuilder CIBuilder(CGM); 1355 auto DI = Data.begin(); 1356 for (const FieldDecl *FD : RD->fields()) { 1357 unsigned Idx = RL.getLLVMFieldNo(FD); 1358 // Fill the alignment. 1359 for (unsigned I = PrevIdx; I < Idx; ++I) 1360 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1361 PrevIdx = Idx + 1; 1362 Fields.add(*DI); 1363 ++DI; 1364 } 1365 } 1366 1367 template <class... As> 1368 static llvm::GlobalVariable * 1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1370 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1371 As &&... Args) { 1372 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1373 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1374 ConstantInitBuilder CIBuilder(CGM); 1375 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1376 buildStructValue(Fields, CGM, RD, RL, Data); 1377 return Fields.finishAndCreateGlobal( 1378 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1379 std::forward<As>(Args)...); 1380 } 1381 1382 template <typename T> 1383 static void 1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1385 ArrayRef<llvm::Constant *> Data, 1386 T &Parent) { 1387 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1388 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1389 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1390 buildStructValue(Fields, CGM, RD, RL, Data); 1391 Fields.finishAndAddTo(Parent); 1392 } 1393 1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1395 bool AtCurrentPoint) { 1396 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1397 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1398 1399 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1400 if (AtCurrentPoint) { 1401 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1402 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1403 } else { 1404 Elem.second.ServiceInsertPt = 1405 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1406 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1407 } 1408 } 1409 1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1412 if (Elem.second.ServiceInsertPt) { 1413 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1414 Elem.second.ServiceInsertPt = nullptr; 1415 Ptr->eraseFromParent(); 1416 } 1417 } 1418 1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1420 SourceLocation Loc, 1421 SmallString<128> &Buffer) { 1422 llvm::raw_svector_ostream OS(Buffer); 1423 // Build debug location 1424 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1425 OS << ";" << PLoc.getFilename() << ";"; 1426 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1427 OS << FD->getQualifiedNameAsString(); 1428 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1429 return OS.str(); 1430 } 1431 1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1433 SourceLocation Loc, 1434 unsigned Flags) { 1435 llvm::Constant *SrcLocStr; 1436 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1437 Loc.isInvalid()) { 1438 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1439 } else { 1440 std::string FunctionName = ""; 1441 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1442 FunctionName = FD->getQualifiedNameAsString(); 1443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1444 const char *FileName = PLoc.getFilename(); 1445 unsigned Line = PLoc.getLine(); 1446 unsigned Column = PLoc.getColumn(); 1447 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1448 Line, Column); 1449 } 1450 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1451 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1452 Reserved2Flags); 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1456 SourceLocation Loc) { 1457 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1458 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1459 // the clang invariants used below might be broken. 1460 if (CGM.getLangOpts().OpenMPIRBuilder) { 1461 SmallString<128> Buffer; 1462 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521 } 1522 1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556 } 1557 1558 llvm::FunctionCallee 1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1563 : "__kmpc_for_static_init_4u") 1564 : (IVSigned ? "__kmpc_for_static_init_8" 1565 : "__kmpc_for_static_init_8u"); 1566 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1567 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1568 llvm::Type *TypeParams[] = { 1569 getIdentTyPointerTy(), // loc 1570 CGM.Int32Ty, // tid 1571 CGM.Int32Ty, // schedtype 1572 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1573 PtrTy, // p_lower 1574 PtrTy, // p_upper 1575 PtrTy, // p_stride 1576 ITy, // incr 1577 ITy // chunk 1578 }; 1579 auto *FnTy = 1580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1581 return CGM.CreateRuntimeFunction(FnTy, Name); 1582 } 1583 1584 llvm::FunctionCallee 1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1586 assert((IVSize == 32 || IVSize == 64) && 1587 "IV size is not compatible with the omp runtime"); 1588 StringRef Name = 1589 IVSize == 32 1590 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1591 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1592 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1593 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1594 CGM.Int32Ty, // tid 1595 CGM.Int32Ty, // schedtype 1596 ITy, // lower 1597 ITy, // upper 1598 ITy, // stride 1599 ITy // chunk 1600 }; 1601 auto *FnTy = 1602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1603 return CGM.CreateRuntimeFunction(FnTy, Name); 1604 } 1605 1606 llvm::FunctionCallee 1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1608 assert((IVSize == 32 || IVSize == 64) && 1609 "IV size is not compatible with the omp runtime"); 1610 StringRef Name = 1611 IVSize == 32 1612 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1613 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1614 llvm::Type *TypeParams[] = { 1615 getIdentTyPointerTy(), // loc 1616 CGM.Int32Ty, // tid 1617 }; 1618 auto *FnTy = 1619 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1620 return CGM.CreateRuntimeFunction(FnTy, Name); 1621 } 1622 1623 llvm::FunctionCallee 1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1625 assert((IVSize == 32 || IVSize == 64) && 1626 "IV size is not compatible with the omp runtime"); 1627 StringRef Name = 1628 IVSize == 32 1629 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1630 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1631 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1632 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1633 llvm::Type *TypeParams[] = { 1634 getIdentTyPointerTy(), // loc 1635 CGM.Int32Ty, // tid 1636 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1637 PtrTy, // p_lower 1638 PtrTy, // p_upper 1639 PtrTy // p_stride 1640 }; 1641 auto *FnTy = 1642 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1643 return CGM.CreateRuntimeFunction(FnTy, Name); 1644 } 1645 1646 /// Obtain information that uniquely identifies a target entry. This 1647 /// consists of the file and device IDs as well as line number associated with 1648 /// the relevant entry source location. 1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1650 unsigned &DeviceID, unsigned &FileID, 1651 unsigned &LineNum) { 1652 SourceManager &SM = C.getSourceManager(); 1653 1654 // The loc should be always valid and have a file ID (the user cannot use 1655 // #pragma directives in macros) 1656 1657 assert(Loc.isValid() && "Source location is expected to be always valid."); 1658 1659 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1660 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1661 1662 llvm::sys::fs::UniqueID ID; 1663 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1664 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1666 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1667 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1668 << PLoc.getFilename() << EC.message(); 1669 } 1670 1671 DeviceID = ID.getDevice(); 1672 FileID = ID.getFile(); 1673 LineNum = PLoc.getLine(); 1674 } 1675 1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1677 if (CGM.getLangOpts().OpenMPSimd) 1678 return Address::invalid(); 1679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1681 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1682 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1683 HasRequiresUnifiedSharedMemory))) { 1684 SmallString<64> PtrName; 1685 { 1686 llvm::raw_svector_ostream OS(PtrName); 1687 OS << CGM.getMangledName(GlobalDecl(VD)); 1688 if (!VD->isExternallyVisible()) { 1689 unsigned DeviceID, FileID, Line; 1690 getTargetEntryUniqueInfo(CGM.getContext(), 1691 VD->getCanonicalDecl()->getBeginLoc(), 1692 DeviceID, FileID, Line); 1693 OS << llvm::format("_%x", FileID); 1694 } 1695 OS << "_decl_tgt_ref_ptr"; 1696 } 1697 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1698 if (!Ptr) { 1699 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1700 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1701 PtrName); 1702 1703 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1704 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1705 1706 if (!CGM.getLangOpts().OpenMPIsDevice) 1707 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1708 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1709 } 1710 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1711 } 1712 return Address::invalid(); 1713 } 1714 1715 llvm::Constant * 1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1717 assert(!CGM.getLangOpts().OpenMPUseTLS || 1718 !CGM.getContext().getTargetInfo().isTLSSupported()); 1719 // Lookup the entry, lazily creating it if necessary. 1720 std::string Suffix = getName({"cache", ""}); 1721 return getOrCreateInternalVariable( 1722 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1723 } 1724 1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1726 const VarDecl *VD, 1727 Address VDAddr, 1728 SourceLocation Loc) { 1729 if (CGM.getLangOpts().OpenMPUseTLS && 1730 CGM.getContext().getTargetInfo().isTLSSupported()) 1731 return VDAddr; 1732 1733 llvm::Type *VarTy = VDAddr.getElementType(); 1734 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1735 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1736 CGM.Int8PtrTy), 1737 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1738 getOrCreateThreadPrivateCache(VD)}; 1739 return Address(CGF.EmitRuntimeCall( 1740 OMPBuilder.getOrCreateRuntimeFunction( 1741 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1742 Args), 1743 VDAddr.getAlignment()); 1744 } 1745 1746 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1750 // library. 1751 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1753 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1754 OMPLoc); 1755 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1756 // to register constructor/destructor for variable. 1757 llvm::Value *Args[] = { 1758 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1759 Ctor, CopyCtor, Dtor}; 1760 CGF.EmitRuntimeCall( 1761 OMPBuilder.getOrCreateRuntimeFunction( 1762 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1763 Args); 1764 } 1765 1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1767 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1768 bool PerformInit, CodeGenFunction *CGF) { 1769 if (CGM.getLangOpts().OpenMPUseTLS && 1770 CGM.getContext().getTargetInfo().isTLSSupported()) 1771 return nullptr; 1772 1773 VD = VD->getDefinition(CGM.getContext()); 1774 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1775 QualType ASTTy = VD->getType(); 1776 1777 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1778 const Expr *Init = VD->getAnyInitializer(); 1779 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1780 // Generate function that re-emits the declaration's initializer into the 1781 // threadprivate copy of the variable VD 1782 CodeGenFunction CtorCGF(CGM); 1783 FunctionArgList Args; 1784 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1785 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1786 ImplicitParamDecl::Other); 1787 Args.push_back(&Dst); 1788 1789 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1790 CGM.getContext().VoidPtrTy, Args); 1791 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1792 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1793 llvm::Function *Fn = 1794 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1795 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1796 Args, Loc, Loc); 1797 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1798 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1799 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1800 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1801 Arg = CtorCGF.Builder.CreateElementBitCast( 1802 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1803 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1804 /*IsInitializer=*/true); 1805 ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1809 CtorCGF.FinishFunction(); 1810 Ctor = Fn; 1811 } 1812 if (VD->getType().isDestructedType() != QualType::DK_none) { 1813 // Generate function that emits destructor call for the threadprivate copy 1814 // of the variable VD 1815 CodeGenFunction DtorCGF(CGM); 1816 FunctionArgList Args; 1817 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1818 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1819 ImplicitParamDecl::Other); 1820 Args.push_back(&Dst); 1821 1822 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1823 CGM.getContext().VoidTy, Args); 1824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1825 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1826 llvm::Function *Fn = 1827 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1828 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1829 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1830 Loc, Loc); 1831 // Create a scope with an artificial location for the body of this function. 1832 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1833 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1834 DtorCGF.GetAddrOfLocalVar(&Dst), 1835 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1836 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1837 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1838 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1839 DtorCGF.FinishFunction(); 1840 Dtor = Fn; 1841 } 1842 // Do not emit init function if it is not required. 1843 if (!Ctor && !Dtor) 1844 return nullptr; 1845 1846 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1847 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1848 /*isVarArg=*/false) 1849 ->getPointerTo(); 1850 // Copying constructor for the threadprivate variable. 1851 // Must be NULL - reserved by runtime, but currently it requires that this 1852 // parameter is always NULL. Otherwise it fires assertion. 1853 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1854 if (Ctor == nullptr) { 1855 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 Ctor = llvm::Constant::getNullValue(CtorTy); 1859 } 1860 if (Dtor == nullptr) { 1861 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Dtor = llvm::Constant::getNullValue(DtorTy); 1865 } 1866 if (!CGF) { 1867 auto *InitFunctionTy = 1868 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1869 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1870 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1871 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1872 CodeGenFunction InitCGF(CGM); 1873 FunctionArgList ArgList; 1874 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1875 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1876 Loc, Loc); 1877 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 InitCGF.FinishFunction(); 1879 return InitFunction; 1880 } 1881 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1882 } 1883 return nullptr; 1884 } 1885 1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1887 llvm::GlobalVariable *Addr, 1888 bool PerformInit) { 1889 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1890 !CGM.getLangOpts().OpenMPIsDevice) 1891 return false; 1892 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1893 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1894 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1895 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1896 HasRequiresUnifiedSharedMemory)) 1897 return CGM.getLangOpts().OpenMPIsDevice; 1898 VD = VD->getDefinition(CGM.getContext()); 1899 assert(VD && "Unknown VarDecl"); 1900 1901 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1902 return CGM.getLangOpts().OpenMPIsDevice; 1903 1904 QualType ASTTy = VD->getType(); 1905 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1906 1907 // Produce the unique prefix to identify the new target regions. We use 1908 // the source location of the variable declaration which we know to not 1909 // conflict with any target region. 1910 unsigned DeviceID; 1911 unsigned FileID; 1912 unsigned Line; 1913 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1914 SmallString<128> Buffer, Out; 1915 { 1916 llvm::raw_svector_ostream OS(Buffer); 1917 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1918 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1919 } 1920 1921 const Expr *Init = VD->getAnyInitializer(); 1922 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1923 llvm::Constant *Ctor; 1924 llvm::Constant *ID; 1925 if (CGM.getLangOpts().OpenMPIsDevice) { 1926 // Generate function that re-emits the declaration's initializer into 1927 // the threadprivate copy of the variable VD 1928 CodeGenFunction CtorCGF(CGM); 1929 1930 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1931 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1932 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1933 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1934 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1935 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1938 CtorCGF.EmitAnyExprToMem(Init, 1939 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1940 Init->getType().getQualifiers(), 1941 /*IsInitializer=*/true); 1942 CtorCGF.FinishFunction(); 1943 Ctor = Fn; 1944 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1945 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1946 } else { 1947 Ctor = new llvm::GlobalVariable( 1948 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1949 llvm::GlobalValue::PrivateLinkage, 1950 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1951 ID = Ctor; 1952 } 1953 1954 // Register the information for the entry associated with the constructor. 1955 Out.clear(); 1956 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1957 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1958 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1959 } 1960 if (VD->getType().isDestructedType() != QualType::DK_none) { 1961 llvm::Constant *Dtor; 1962 llvm::Constant *ID; 1963 if (CGM.getLangOpts().OpenMPIsDevice) { 1964 // Generate function that emits destructor call for the threadprivate 1965 // copy of the variable VD 1966 CodeGenFunction DtorCGF(CGM); 1967 1968 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1969 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1970 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1971 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1972 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1973 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1974 FunctionArgList(), Loc, Loc); 1975 // Create a scope with an artificial location for the body of this 1976 // function. 1977 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1978 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1979 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1980 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1981 DtorCGF.FinishFunction(); 1982 Dtor = Fn; 1983 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1984 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1985 } else { 1986 Dtor = new llvm::GlobalVariable( 1987 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1988 llvm::GlobalValue::PrivateLinkage, 1989 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1990 ID = Dtor; 1991 } 1992 // Register the information for the entry associated with the destructor. 1993 Out.clear(); 1994 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1995 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1996 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1997 } 1998 return CGM.getLangOpts().OpenMPIsDevice; 1999 } 2000 2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2002 QualType VarType, 2003 StringRef Name) { 2004 std::string Suffix = getName({"artificial", ""}); 2005 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2006 llvm::Value *GAddr = 2007 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2008 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2009 CGM.getTarget().isTLSSupported()) { 2010 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2011 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2012 } 2013 std::string CacheSuffix = getName({"cache", ""}); 2014 llvm::Value *Args[] = { 2015 emitUpdateLocation(CGF, SourceLocation()), 2016 getThreadID(CGF, SourceLocation()), 2017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2018 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2019 /*isSigned=*/false), 2020 getOrCreateInternalVariable( 2021 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2022 return Address( 2023 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2024 CGF.EmitRuntimeCall( 2025 OMPBuilder.getOrCreateRuntimeFunction( 2026 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2027 Args), 2028 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2029 CGM.getContext().getTypeAlignInChars(VarType)); 2030 } 2031 2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2033 const RegionCodeGenTy &ThenGen, 2034 const RegionCodeGenTy &ElseGen) { 2035 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2036 2037 // If the condition constant folds and can be elided, try to avoid emitting 2038 // the condition and the dead arm of the if/else. 2039 bool CondConstant; 2040 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2041 if (CondConstant) 2042 ThenGen(CGF); 2043 else 2044 ElseGen(CGF); 2045 return; 2046 } 2047 2048 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2049 // emit the conditional branch. 2050 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2051 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2052 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2053 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2054 2055 // Emit the 'then' code. 2056 CGF.EmitBlock(ThenBlock); 2057 ThenGen(CGF); 2058 CGF.EmitBranch(ContBlock); 2059 // Emit the 'else' code if present. 2060 // There is no need to emit line number for unconditional branch. 2061 (void)ApplyDebugLocation::CreateEmpty(CGF); 2062 CGF.EmitBlock(ElseBlock); 2063 ElseGen(CGF); 2064 // There is no need to emit line number for unconditional branch. 2065 (void)ApplyDebugLocation::CreateEmpty(CGF); 2066 CGF.EmitBranch(ContBlock); 2067 // Emit the continuation block for code after the if. 2068 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2069 } 2070 2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2072 llvm::Function *OutlinedFn, 2073 ArrayRef<llvm::Value *> CapturedVars, 2074 const Expr *IfCond) { 2075 if (!CGF.HaveInsertPoint()) 2076 return; 2077 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2078 auto &M = CGM.getModule(); 2079 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2082 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2083 llvm::Value *Args[] = { 2084 RTLoc, 2085 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2086 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2087 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2088 RealArgs.append(std::begin(Args), std::end(Args)); 2089 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2090 2091 llvm::FunctionCallee RTLFn = 2092 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2093 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2094 }; 2095 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2096 this](CodeGenFunction &CGF, PrePostActionTy &) { 2097 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2098 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2099 // Build calls: 2100 // __kmpc_serialized_parallel(&Loc, GTid); 2101 llvm::Value *Args[] = {RTLoc, ThreadID}; 2102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2103 M, OMPRTL___kmpc_serialized_parallel), 2104 Args); 2105 2106 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2107 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2108 Address ZeroAddrBound = 2109 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2110 /*Name=*/".bound.zero.addr"); 2111 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2112 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2113 // ThreadId for serialized parallels is 0. 2114 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2115 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2116 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2117 2118 // Ensure we do not inline the function. This is trivially true for the ones 2119 // passed to __kmpc_fork_call but the ones calles in serialized regions 2120 // could be inlined. This is not a perfect but it is closer to the invariant 2121 // we want, namely, every data environment starts with a new function. 2122 // TODO: We should pass the if condition to the runtime function and do the 2123 // handling there. Much cleaner code. 2124 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2125 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2126 2127 // __kmpc_end_serialized_parallel(&Loc, GTid); 2128 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2130 M, OMPRTL___kmpc_end_serialized_parallel), 2131 EndArgs); 2132 }; 2133 if (IfCond) { 2134 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2135 } else { 2136 RegionCodeGenTy ThenRCG(ThenGen); 2137 ThenRCG(CGF); 2138 } 2139 } 2140 2141 // If we're inside an (outlined) parallel region, use the region info's 2142 // thread-ID variable (it is passed in a first argument of the outlined function 2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2144 // regular serial code region, get thread ID by calling kmp_int32 2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2146 // return the address of that temp. 2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2148 SourceLocation Loc) { 2149 if (auto *OMPRegionInfo = 2150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 if (OMPRegionInfo->getThreadIDVariable()) 2152 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2153 2154 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2155 QualType Int32Ty = 2156 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2157 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2158 CGF.EmitStoreOfScalar(ThreadID, 2159 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2160 2161 return ThreadIDTemp; 2162 } 2163 2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2165 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2166 SmallString<256> Buffer; 2167 llvm::raw_svector_ostream Out(Buffer); 2168 Out << Name; 2169 StringRef RuntimeName = Out.str(); 2170 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2171 if (Elem.second) { 2172 assert(Elem.second->getType()->getPointerElementType() == Ty && 2173 "OMP internal variable has different type than requested"); 2174 return &*Elem.second; 2175 } 2176 2177 return Elem.second = new llvm::GlobalVariable( 2178 CGM.getModule(), Ty, /*IsConstant*/ false, 2179 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2180 Elem.first(), /*InsertBefore=*/nullptr, 2181 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2182 } 2183 2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2185 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2186 std::string Name = getName({Prefix, "var"}); 2187 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2188 } 2189 2190 namespace { 2191 /// Common pre(post)-action for different OpenMP constructs. 2192 class CommonActionTy final : public PrePostActionTy { 2193 llvm::FunctionCallee EnterCallee; 2194 ArrayRef<llvm::Value *> EnterArgs; 2195 llvm::FunctionCallee ExitCallee; 2196 ArrayRef<llvm::Value *> ExitArgs; 2197 bool Conditional; 2198 llvm::BasicBlock *ContBlock = nullptr; 2199 2200 public: 2201 CommonActionTy(llvm::FunctionCallee EnterCallee, 2202 ArrayRef<llvm::Value *> EnterArgs, 2203 llvm::FunctionCallee ExitCallee, 2204 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2205 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2206 ExitArgs(ExitArgs), Conditional(Conditional) {} 2207 void Enter(CodeGenFunction &CGF) override { 2208 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2209 if (Conditional) { 2210 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2211 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2212 ContBlock = CGF.createBasicBlock("omp_if.end"); 2213 // Generate the branch (If-stmt) 2214 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2215 CGF.EmitBlock(ThenBlock); 2216 } 2217 } 2218 void Done(CodeGenFunction &CGF) { 2219 // Emit the rest of blocks/branches 2220 CGF.EmitBranch(ContBlock); 2221 CGF.EmitBlock(ContBlock, true); 2222 } 2223 void Exit(CodeGenFunction &CGF) override { 2224 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2225 } 2226 }; 2227 } // anonymous namespace 2228 2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2230 StringRef CriticalName, 2231 const RegionCodeGenTy &CriticalOpGen, 2232 SourceLocation Loc, const Expr *Hint) { 2233 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2234 // CriticalOpGen(); 2235 // __kmpc_end_critical(ident_t *, gtid, Lock); 2236 // Prepare arguments and build a call to __kmpc_critical 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2240 getCriticalRegionLock(CriticalName)}; 2241 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2242 std::end(Args)); 2243 if (Hint) { 2244 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2245 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2246 } 2247 CommonActionTy Action( 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), 2250 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2251 EnterArgs, 2252 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2253 OMPRTL___kmpc_end_critical), 2254 Args); 2255 CriticalOpGen.setAction(Action); 2256 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2257 } 2258 2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2260 const RegionCodeGenTy &MasterOpGen, 2261 SourceLocation Loc) { 2262 if (!CGF.HaveInsertPoint()) 2263 return; 2264 // if(__kmpc_master(ident_t *, gtid)) { 2265 // MasterOpGen(); 2266 // __kmpc_end_master(ident_t *, gtid); 2267 // } 2268 // Prepare arguments and build a call to __kmpc_master 2269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2270 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2271 CGM.getModule(), OMPRTL___kmpc_master), 2272 Args, 2273 OMPBuilder.getOrCreateRuntimeFunction( 2274 CGM.getModule(), OMPRTL___kmpc_end_master), 2275 Args, 2276 /*Conditional=*/true); 2277 MasterOpGen.setAction(Action); 2278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2279 Action.Done(CGF); 2280 } 2281 2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2283 const RegionCodeGenTy &MaskedOpGen, 2284 SourceLocation Loc, const Expr *Filter) { 2285 if (!CGF.HaveInsertPoint()) 2286 return; 2287 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2288 // MaskedOpGen(); 2289 // __kmpc_end_masked(iden_t *, gtid); 2290 // } 2291 // Prepare arguments and build a call to __kmpc_masked 2292 llvm::Value *FilterVal = Filter 2293 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2294 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2296 FilterVal}; 2297 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2298 getThreadID(CGF, Loc)}; 2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_masked), 2301 Args, 2302 OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_end_masked), 2304 ArgsEnd, 2305 /*Conditional=*/true); 2306 MaskedOpGen.setAction(Action); 2307 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2308 Action.Done(CGF); 2309 } 2310 2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2312 SourceLocation Loc) { 2313 if (!CGF.HaveInsertPoint()) 2314 return; 2315 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2316 OMPBuilder.createTaskyield(CGF.Builder); 2317 } else { 2318 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2319 llvm::Value *Args[] = { 2320 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2321 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2323 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2324 Args); 2325 } 2326 2327 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2328 Region->emitUntiedSwitch(CGF); 2329 } 2330 2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2332 const RegionCodeGenTy &TaskgroupOpGen, 2333 SourceLocation Loc) { 2334 if (!CGF.HaveInsertPoint()) 2335 return; 2336 // __kmpc_taskgroup(ident_t *, gtid); 2337 // TaskgroupOpGen(); 2338 // __kmpc_end_taskgroup(ident_t *, gtid); 2339 // Prepare arguments and build a call to __kmpc_taskgroup 2340 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2341 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2342 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2343 Args, 2344 OMPBuilder.getOrCreateRuntimeFunction( 2345 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2346 Args); 2347 TaskgroupOpGen.setAction(Action); 2348 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2349 } 2350 2351 /// Given an array of pointers to variables, project the address of a 2352 /// given variable. 2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2354 unsigned Index, const VarDecl *Var) { 2355 // Pull out the pointer to the variable. 2356 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2357 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2358 2359 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2360 Addr = CGF.Builder.CreateElementBitCast( 2361 Addr, CGF.ConvertTypeForMem(Var->getType())); 2362 return Addr; 2363 } 2364 2365 static llvm::Value *emitCopyprivateCopyFunction( 2366 CodeGenModule &CGM, llvm::Type *ArgsType, 2367 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2368 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2369 SourceLocation Loc) { 2370 ASTContext &C = CGM.getContext(); 2371 // void copy_func(void *LHSArg, void *RHSArg); 2372 FunctionArgList Args; 2373 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2374 ImplicitParamDecl::Other); 2375 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2376 ImplicitParamDecl::Other); 2377 Args.push_back(&LHSArg); 2378 Args.push_back(&RHSArg); 2379 const auto &CGFI = 2380 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2381 std::string Name = 2382 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2383 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2384 llvm::GlobalValue::InternalLinkage, Name, 2385 &CGM.getModule()); 2386 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2387 Fn->setDoesNotRecurse(); 2388 CodeGenFunction CGF(CGM); 2389 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2390 // Dest = (void*[n])(LHSArg); 2391 // Src = (void*[n])(RHSArg); 2392 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2393 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2394 ArgsType), CGF.getPointerAlign()); 2395 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2397 ArgsType), CGF.getPointerAlign()); 2398 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2399 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2400 // ... 2401 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2402 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2403 const auto *DestVar = 2404 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2405 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2406 2407 const auto *SrcVar = 2408 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2409 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2410 2411 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2412 QualType Type = VD->getType(); 2413 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2414 } 2415 CGF.FinishFunction(); 2416 return Fn; 2417 } 2418 2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2420 const RegionCodeGenTy &SingleOpGen, 2421 SourceLocation Loc, 2422 ArrayRef<const Expr *> CopyprivateVars, 2423 ArrayRef<const Expr *> SrcExprs, 2424 ArrayRef<const Expr *> DstExprs, 2425 ArrayRef<const Expr *> AssignmentOps) { 2426 if (!CGF.HaveInsertPoint()) 2427 return; 2428 assert(CopyprivateVars.size() == SrcExprs.size() && 2429 CopyprivateVars.size() == DstExprs.size() && 2430 CopyprivateVars.size() == AssignmentOps.size()); 2431 ASTContext &C = CGM.getContext(); 2432 // int32 did_it = 0; 2433 // if(__kmpc_single(ident_t *, gtid)) { 2434 // SingleOpGen(); 2435 // __kmpc_end_single(ident_t *, gtid); 2436 // did_it = 1; 2437 // } 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 2441 Address DidIt = Address::invalid(); 2442 if (!CopyprivateVars.empty()) { 2443 // int32 did_it = 0; 2444 QualType KmpInt32Ty = 2445 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2446 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2447 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2448 } 2449 // Prepare arguments and build a call to __kmpc_single 2450 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2451 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2452 CGM.getModule(), OMPRTL___kmpc_single), 2453 Args, 2454 OMPBuilder.getOrCreateRuntimeFunction( 2455 CGM.getModule(), OMPRTL___kmpc_end_single), 2456 Args, 2457 /*Conditional=*/true); 2458 SingleOpGen.setAction(Action); 2459 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2460 if (DidIt.isValid()) { 2461 // did_it = 1; 2462 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2463 } 2464 Action.Done(CGF); 2465 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2466 // <copy_func>, did_it); 2467 if (DidIt.isValid()) { 2468 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2469 QualType CopyprivateArrayTy = C.getConstantArrayType( 2470 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2471 /*IndexTypeQuals=*/0); 2472 // Create a list of all private variables for copyprivate. 2473 Address CopyprivateList = 2474 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2475 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2476 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2477 CGF.Builder.CreateStore( 2478 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2479 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2480 CGF.VoidPtrTy), 2481 Elem); 2482 } 2483 // Build function that copies private values from single region to all other 2484 // threads in the corresponding parallel region. 2485 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2486 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2487 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2488 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2489 Address CL = 2490 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2491 CGF.VoidPtrTy); 2492 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2493 llvm::Value *Args[] = { 2494 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2495 getThreadID(CGF, Loc), // i32 <gtid> 2496 BufSize, // size_t <buf_size> 2497 CL.getPointer(), // void *<copyprivate list> 2498 CpyFn, // void (*) (void *, void *) <copy_func> 2499 DidItVal // i32 did_it 2500 }; 2501 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2502 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2503 Args); 2504 } 2505 } 2506 2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2508 const RegionCodeGenTy &OrderedOpGen, 2509 SourceLocation Loc, bool IsThreads) { 2510 if (!CGF.HaveInsertPoint()) 2511 return; 2512 // __kmpc_ordered(ident_t *, gtid); 2513 // OrderedOpGen(); 2514 // __kmpc_end_ordered(ident_t *, gtid); 2515 // Prepare arguments and build a call to __kmpc_ordered 2516 if (IsThreads) { 2517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2518 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2519 CGM.getModule(), OMPRTL___kmpc_ordered), 2520 Args, 2521 OMPBuilder.getOrCreateRuntimeFunction( 2522 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2523 Args); 2524 OrderedOpGen.setAction(Action); 2525 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2526 return; 2527 } 2528 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2529 } 2530 2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2532 unsigned Flags; 2533 if (Kind == OMPD_for) 2534 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2535 else if (Kind == OMPD_sections) 2536 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2537 else if (Kind == OMPD_single) 2538 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2539 else if (Kind == OMPD_barrier) 2540 Flags = OMP_IDENT_BARRIER_EXPL; 2541 else 2542 Flags = OMP_IDENT_BARRIER_IMPL; 2543 return Flags; 2544 } 2545 2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2547 CodeGenFunction &CGF, const OMPLoopDirective &S, 2548 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2549 // Check if the loop directive is actually a doacross loop directive. In this 2550 // case choose static, 1 schedule. 2551 if (llvm::any_of( 2552 S.getClausesOfKind<OMPOrderedClause>(), 2553 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2554 ScheduleKind = OMPC_SCHEDULE_static; 2555 // Chunk size is 1 in this case. 2556 llvm::APInt ChunkSize(32, 1); 2557 ChunkExpr = IntegerLiteral::Create( 2558 CGF.getContext(), ChunkSize, 2559 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2560 SourceLocation()); 2561 } 2562 } 2563 2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2565 OpenMPDirectiveKind Kind, bool EmitChecks, 2566 bool ForceSimpleCall) { 2567 // Check if we should use the OMPBuilder 2568 auto *OMPRegionInfo = 2569 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2570 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2571 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2572 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2573 return; 2574 } 2575 2576 if (!CGF.HaveInsertPoint()) 2577 return; 2578 // Build call __kmpc_cancel_barrier(loc, thread_id); 2579 // Build call __kmpc_barrier(loc, thread_id); 2580 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2581 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2582 // thread_id); 2583 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2584 getThreadID(CGF, Loc)}; 2585 if (OMPRegionInfo) { 2586 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2587 llvm::Value *Result = CGF.EmitRuntimeCall( 2588 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2589 OMPRTL___kmpc_cancel_barrier), 2590 Args); 2591 if (EmitChecks) { 2592 // if (__kmpc_cancel_barrier()) { 2593 // exit from construct; 2594 // } 2595 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2596 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2597 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2598 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2599 CGF.EmitBlock(ExitBB); 2600 // exit from construct; 2601 CodeGenFunction::JumpDest CancelDestination = 2602 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2603 CGF.EmitBranchThroughCleanup(CancelDestination); 2604 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2605 } 2606 return; 2607 } 2608 } 2609 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2610 CGM.getModule(), OMPRTL___kmpc_barrier), 2611 Args); 2612 } 2613 2614 /// Map the OpenMP loop schedule to the runtime enumeration. 2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked, bool Ordered) { 2617 switch (ScheduleKind) { 2618 case OMPC_SCHEDULE_static: 2619 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2620 : (Ordered ? OMP_ord_static : OMP_sch_static); 2621 case OMPC_SCHEDULE_dynamic: 2622 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2623 case OMPC_SCHEDULE_guided: 2624 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2625 case OMPC_SCHEDULE_runtime: 2626 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2627 case OMPC_SCHEDULE_auto: 2628 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2629 case OMPC_SCHEDULE_unknown: 2630 assert(!Chunked && "chunk was specified but schedule kind not known"); 2631 return Ordered ? OMP_ord_static : OMP_sch_static; 2632 } 2633 llvm_unreachable("Unexpected runtime schedule"); 2634 } 2635 2636 /// Map the OpenMP distribute schedule to the runtime enumeration. 2637 static OpenMPSchedType 2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2639 // only static is allowed for dist_schedule 2640 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2641 } 2642 2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2644 bool Chunked) const { 2645 OpenMPSchedType Schedule = 2646 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2647 return Schedule == OMP_sch_static; 2648 } 2649 2650 bool CGOpenMPRuntime::isStaticNonchunked( 2651 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2652 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2653 return Schedule == OMP_dist_sch_static; 2654 } 2655 2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2657 bool Chunked) const { 2658 OpenMPSchedType Schedule = 2659 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2660 return Schedule == OMP_sch_static_chunked; 2661 } 2662 2663 bool CGOpenMPRuntime::isStaticChunked( 2664 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2665 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2666 return Schedule == OMP_dist_sch_static_chunked; 2667 } 2668 2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2672 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2673 return Schedule != OMP_sch_static; 2674 } 2675 2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2677 OpenMPScheduleClauseModifier M1, 2678 OpenMPScheduleClauseModifier M2) { 2679 int Modifier = 0; 2680 switch (M1) { 2681 case OMPC_SCHEDULE_MODIFIER_monotonic: 2682 Modifier = OMP_sch_modifier_monotonic; 2683 break; 2684 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2685 Modifier = OMP_sch_modifier_nonmonotonic; 2686 break; 2687 case OMPC_SCHEDULE_MODIFIER_simd: 2688 if (Schedule == OMP_sch_static_chunked) 2689 Schedule = OMP_sch_static_balanced_chunked; 2690 break; 2691 case OMPC_SCHEDULE_MODIFIER_last: 2692 case OMPC_SCHEDULE_MODIFIER_unknown: 2693 break; 2694 } 2695 switch (M2) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2711 // If the static schedule kind is specified or if the ordered clause is 2712 // specified, and if the nonmonotonic modifier is not specified, the effect is 2713 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2714 // modifier is specified, the effect is as if the nonmonotonic modifier is 2715 // specified. 2716 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2717 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2718 Schedule == OMP_sch_static_balanced_chunked || 2719 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2720 Schedule == OMP_dist_sch_static_chunked || 2721 Schedule == OMP_dist_sch_static)) 2722 Modifier = OMP_sch_modifier_nonmonotonic; 2723 } 2724 return Schedule | Modifier; 2725 } 2726 2727 void CGOpenMPRuntime::emitForDispatchInit( 2728 CodeGenFunction &CGF, SourceLocation Loc, 2729 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2730 bool Ordered, const DispatchRTInput &DispatchValues) { 2731 if (!CGF.HaveInsertPoint()) 2732 return; 2733 OpenMPSchedType Schedule = getRuntimeSchedule( 2734 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2735 assert(Ordered || 2736 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2737 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2738 Schedule != OMP_sch_static_balanced_chunked)); 2739 // Call __kmpc_dispatch_init( 2740 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2741 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2742 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2743 2744 // If the Chunk was not specified in the clause - use default value 1. 2745 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2746 : CGF.Builder.getIntN(IVSize, 1); 2747 llvm::Value *Args[] = { 2748 emitUpdateLocation(CGF, Loc), 2749 getThreadID(CGF, Loc), 2750 CGF.Builder.getInt32(addMonoNonMonoModifier( 2751 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2752 DispatchValues.LB, // Lower 2753 DispatchValues.UB, // Upper 2754 CGF.Builder.getIntN(IVSize, 1), // Stride 2755 Chunk // Chunk 2756 }; 2757 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2758 } 2759 2760 static void emitForStaticInitCall( 2761 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2762 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2763 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2764 const CGOpenMPRuntime::StaticRTInput &Values) { 2765 if (!CGF.HaveInsertPoint()) 2766 return; 2767 2768 assert(!Values.Ordered); 2769 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2770 Schedule == OMP_sch_static_balanced_chunked || 2771 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2772 Schedule == OMP_dist_sch_static || 2773 Schedule == OMP_dist_sch_static_chunked); 2774 2775 // Call __kmpc_for_static_init( 2776 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2777 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2778 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2779 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2780 llvm::Value *Chunk = Values.Chunk; 2781 if (Chunk == nullptr) { 2782 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2783 Schedule == OMP_dist_sch_static) && 2784 "expected static non-chunked schedule"); 2785 // If the Chunk was not specified in the clause - use default value 1. 2786 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2787 } else { 2788 assert((Schedule == OMP_sch_static_chunked || 2789 Schedule == OMP_sch_static_balanced_chunked || 2790 Schedule == OMP_ord_static_chunked || 2791 Schedule == OMP_dist_sch_static_chunked) && 2792 "expected static chunked schedule"); 2793 } 2794 llvm::Value *Args[] = { 2795 UpdateLocation, 2796 ThreadId, 2797 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2798 M2)), // Schedule type 2799 Values.IL.getPointer(), // &isLastIter 2800 Values.LB.getPointer(), // &LB 2801 Values.UB.getPointer(), // &UB 2802 Values.ST.getPointer(), // &Stride 2803 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2804 Chunk // Chunk 2805 }; 2806 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2807 } 2808 2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2810 SourceLocation Loc, 2811 OpenMPDirectiveKind DKind, 2812 const OpenMPScheduleTy &ScheduleKind, 2813 const StaticRTInput &Values) { 2814 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2815 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2816 assert(isOpenMPWorksharingDirective(DKind) && 2817 "Expected loop-based or sections-based directive."); 2818 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2819 isOpenMPLoopDirective(DKind) 2820 ? OMP_IDENT_WORK_LOOP 2821 : OMP_IDENT_WORK_SECTIONS); 2822 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2823 llvm::FunctionCallee StaticInitFunction = 2824 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2825 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2826 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2827 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2828 } 2829 2830 void CGOpenMPRuntime::emitDistributeStaticInit( 2831 CodeGenFunction &CGF, SourceLocation Loc, 2832 OpenMPDistScheduleClauseKind SchedKind, 2833 const CGOpenMPRuntime::StaticRTInput &Values) { 2834 OpenMPSchedType ScheduleNum = 2835 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2836 llvm::Value *UpdatedLocation = 2837 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2839 llvm::FunctionCallee StaticInitFunction = 2840 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2843 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2844 } 2845 2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2847 SourceLocation Loc, 2848 OpenMPDirectiveKind DKind) { 2849 if (!CGF.HaveInsertPoint()) 2850 return; 2851 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2852 llvm::Value *Args[] = { 2853 emitUpdateLocation(CGF, Loc, 2854 isOpenMPDistributeDirective(DKind) 2855 ? OMP_IDENT_WORK_DISTRIBUTE 2856 : isOpenMPLoopDirective(DKind) 2857 ? OMP_IDENT_WORK_LOOP 2858 : OMP_IDENT_WORK_SECTIONS), 2859 getThreadID(CGF, Loc)}; 2860 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2861 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2862 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2863 Args); 2864 } 2865 2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 unsigned IVSize, 2869 bool IVSigned) { 2870 if (!CGF.HaveInsertPoint()) 2871 return; 2872 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2873 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2874 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2875 } 2876 2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2878 SourceLocation Loc, unsigned IVSize, 2879 bool IVSigned, Address IL, 2880 Address LB, Address UB, 2881 Address ST) { 2882 // Call __kmpc_dispatch_next( 2883 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2884 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2885 // kmp_int[32|64] *p_stride); 2886 llvm::Value *Args[] = { 2887 emitUpdateLocation(CGF, Loc), 2888 getThreadID(CGF, Loc), 2889 IL.getPointer(), // &isLastIter 2890 LB.getPointer(), // &Lower 2891 UB.getPointer(), // &Upper 2892 ST.getPointer() // &Stride 2893 }; 2894 llvm::Value *Call = 2895 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2896 return CGF.EmitScalarConversion( 2897 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2898 CGF.getContext().BoolTy, Loc); 2899 } 2900 2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2902 llvm::Value *NumThreads, 2903 SourceLocation Loc) { 2904 if (!CGF.HaveInsertPoint()) 2905 return; 2906 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2907 llvm::Value *Args[] = { 2908 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2909 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2910 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2911 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2912 Args); 2913 } 2914 2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2916 ProcBindKind ProcBind, 2917 SourceLocation Loc) { 2918 if (!CGF.HaveInsertPoint()) 2919 return; 2920 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2921 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2922 llvm::Value *Args[] = { 2923 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2924 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2926 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2927 Args); 2928 } 2929 2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2931 SourceLocation Loc, llvm::AtomicOrdering AO) { 2932 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2933 OMPBuilder.createFlush(CGF.Builder); 2934 } else { 2935 if (!CGF.HaveInsertPoint()) 2936 return; 2937 // Build call void __kmpc_flush(ident_t *loc) 2938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2939 CGM.getModule(), OMPRTL___kmpc_flush), 2940 emitUpdateLocation(CGF, Loc)); 2941 } 2942 } 2943 2944 namespace { 2945 /// Indexes of fields for type kmp_task_t. 2946 enum KmpTaskTFields { 2947 /// List of shared variables. 2948 KmpTaskTShareds, 2949 /// Task routine. 2950 KmpTaskTRoutine, 2951 /// Partition id for the untied tasks. 2952 KmpTaskTPartId, 2953 /// Function with call of destructors for private variables. 2954 Data1, 2955 /// Task priority. 2956 Data2, 2957 /// (Taskloops only) Lower bound. 2958 KmpTaskTLowerBound, 2959 /// (Taskloops only) Upper bound. 2960 KmpTaskTUpperBound, 2961 /// (Taskloops only) Stride. 2962 KmpTaskTStride, 2963 /// (Taskloops only) Is last iteration flag. 2964 KmpTaskTLastIter, 2965 /// (Taskloops only) Reduction data. 2966 KmpTaskTReductions, 2967 }; 2968 } // anonymous namespace 2969 2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2971 return OffloadEntriesTargetRegion.empty() && 2972 OffloadEntriesDeviceGlobalVar.empty(); 2973 } 2974 2975 /// Initialize target region entry. 2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2977 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2978 StringRef ParentName, unsigned LineNum, 2979 unsigned Order) { 2980 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2981 "only required for the device " 2982 "code generation."); 2983 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2984 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2985 OMPTargetRegionEntryTargetRegion); 2986 ++OffloadingEntriesNum; 2987 } 2988 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 llvm::Constant *Addr, llvm::Constant *ID, 2993 OMPTargetRegionEntryKind Flags) { 2994 // If we are emitting code for a target, the entry is already initialized, 2995 // only has to be registered. 2996 if (CGM.getLangOpts().OpenMPIsDevice) { 2997 // This could happen if the device compilation is invoked standalone. 2998 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2999 return; 3000 auto &Entry = 3001 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3002 Entry.setAddress(Addr); 3003 Entry.setID(ID); 3004 Entry.setFlags(Flags); 3005 } else { 3006 if (Flags == 3007 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3008 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3009 /*IgnoreAddressId*/ true)) 3010 return; 3011 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3012 "Target region entry already registered!"); 3013 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3014 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3015 ++OffloadingEntriesNum; 3016 } 3017 } 3018 3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3020 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3021 bool IgnoreAddressId) const { 3022 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3023 if (PerDevice == OffloadEntriesTargetRegion.end()) 3024 return false; 3025 auto PerFile = PerDevice->second.find(FileID); 3026 if (PerFile == PerDevice->second.end()) 3027 return false; 3028 auto PerParentName = PerFile->second.find(ParentName); 3029 if (PerParentName == PerFile->second.end()) 3030 return false; 3031 auto PerLine = PerParentName->second.find(LineNum); 3032 if (PerLine == PerParentName->second.end()) 3033 return false; 3034 // Fail if this entry is already registered. 3035 if (!IgnoreAddressId && 3036 (PerLine->second.getAddress() || PerLine->second.getID())) 3037 return false; 3038 return true; 3039 } 3040 3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3042 const OffloadTargetRegionEntryInfoActTy &Action) { 3043 // Scan all target region entries and perform the provided action. 3044 for (const auto &D : OffloadEntriesTargetRegion) 3045 for (const auto &F : D.second) 3046 for (const auto &P : F.second) 3047 for (const auto &L : P.second) 3048 Action(D.first, F.first, P.first(), L.first, L.second); 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3052 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3053 OMPTargetGlobalVarEntryKind Flags, 3054 unsigned Order) { 3055 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3056 "only required for the device " 3057 "code generation."); 3058 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3059 ++OffloadingEntriesNum; 3060 } 3061 3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3063 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3064 CharUnits VarSize, 3065 OMPTargetGlobalVarEntryKind Flags, 3066 llvm::GlobalValue::LinkageTypes Linkage) { 3067 if (CGM.getLangOpts().OpenMPIsDevice) { 3068 // This could happen if the device compilation is invoked standalone. 3069 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3070 return; 3071 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3072 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3073 if (Entry.getVarSize().isZero()) { 3074 Entry.setVarSize(VarSize); 3075 Entry.setLinkage(Linkage); 3076 } 3077 return; 3078 } 3079 Entry.setVarSize(VarSize); 3080 Entry.setLinkage(Linkage); 3081 Entry.setAddress(Addr); 3082 } else { 3083 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3084 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3085 assert(Entry.isValid() && Entry.getFlags() == Flags && 3086 "Entry not initialized!"); 3087 if (Entry.getVarSize().isZero()) { 3088 Entry.setVarSize(VarSize); 3089 Entry.setLinkage(Linkage); 3090 } 3091 return; 3092 } 3093 OffloadEntriesDeviceGlobalVar.try_emplace( 3094 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3095 ++OffloadingEntriesNum; 3096 } 3097 } 3098 3099 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3100 actOnDeviceGlobalVarEntriesInfo( 3101 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3102 // Scan all target region entries and perform the provided action. 3103 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3104 Action(E.getKey(), E.getValue()); 3105 } 3106 3107 void CGOpenMPRuntime::createOffloadEntry( 3108 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3109 llvm::GlobalValue::LinkageTypes Linkage) { 3110 StringRef Name = Addr->getName(); 3111 llvm::Module &M = CGM.getModule(); 3112 llvm::LLVMContext &C = M.getContext(); 3113 3114 // Create constant string with the name. 3115 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3116 3117 std::string StringName = getName({"omp_offloading", "entry_name"}); 3118 auto *Str = new llvm::GlobalVariable( 3119 M, StrPtrInit->getType(), /*isConstant=*/true, 3120 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3121 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3122 3123 llvm::Constant *Data[] = { 3124 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3125 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3126 llvm::ConstantInt::get(CGM.SizeTy, Size), 3127 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3128 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3129 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3130 llvm::GlobalVariable *Entry = createGlobalStruct( 3131 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3132 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3133 3134 // The entry has to be created in the section the linker expects it to be. 3135 Entry->setSection("omp_offloading_entries"); 3136 } 3137 3138 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3139 // Emit the offloading entries and metadata so that the device codegen side 3140 // can easily figure out what to emit. The produced metadata looks like 3141 // this: 3142 // 3143 // !omp_offload.info = !{!1, ...} 3144 // 3145 // Right now we only generate metadata for function that contain target 3146 // regions. 3147 3148 // If we are in simd mode or there are no entries, we don't need to do 3149 // anything. 3150 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3151 return; 3152 3153 llvm::Module &M = CGM.getModule(); 3154 llvm::LLVMContext &C = M.getContext(); 3155 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3156 SourceLocation, StringRef>, 3157 16> 3158 OrderedEntries(OffloadEntriesInfoManager.size()); 3159 llvm::SmallVector<StringRef, 16> ParentFunctions( 3160 OffloadEntriesInfoManager.size()); 3161 3162 // Auxiliary methods to create metadata values and strings. 3163 auto &&GetMDInt = [this](unsigned V) { 3164 return llvm::ConstantAsMetadata::get( 3165 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3166 }; 3167 3168 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3169 3170 // Create the offloading info metadata node. 3171 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3172 3173 // Create function that emits metadata for each target region entry; 3174 auto &&TargetRegionMetadataEmitter = 3175 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3176 &GetMDString]( 3177 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3178 unsigned Line, 3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3180 // Generate metadata for target regions. Each entry of this metadata 3181 // contains: 3182 // - Entry 0 -> Kind of this type of metadata (0). 3183 // - Entry 1 -> Device ID of the file where the entry was identified. 3184 // - Entry 2 -> File ID of the file where the entry was identified. 3185 // - Entry 3 -> Mangled name of the function where the entry was 3186 // identified. 3187 // - Entry 4 -> Line in the file where the entry was identified. 3188 // - Entry 5 -> Order the entry was created. 3189 // The first element of the metadata node is the kind. 3190 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3191 GetMDInt(FileID), GetMDString(ParentName), 3192 GetMDInt(Line), GetMDInt(E.getOrder())}; 3193 3194 SourceLocation Loc; 3195 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3196 E = CGM.getContext().getSourceManager().fileinfo_end(); 3197 I != E; ++I) { 3198 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3199 I->getFirst()->getUniqueID().getFile() == FileID) { 3200 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3201 I->getFirst(), Line, 1); 3202 break; 3203 } 3204 } 3205 // Save this entry in the right position of the ordered entries array. 3206 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3207 ParentFunctions[E.getOrder()] = ParentName; 3208 3209 // Add metadata to the named metadata node. 3210 MD->addOperand(llvm::MDNode::get(C, Ops)); 3211 }; 3212 3213 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3214 TargetRegionMetadataEmitter); 3215 3216 // Create function that emits metadata for each device global variable entry; 3217 auto &&DeviceGlobalVarMetadataEmitter = 3218 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3219 MD](StringRef MangledName, 3220 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3221 &E) { 3222 // Generate metadata for global variables. Each entry of this metadata 3223 // contains: 3224 // - Entry 0 -> Kind of this type of metadata (1). 3225 // - Entry 1 -> Mangled name of the variable. 3226 // - Entry 2 -> Declare target kind. 3227 // - Entry 3 -> Order the entry was created. 3228 // The first element of the metadata node is the kind. 3229 llvm::Metadata *Ops[] = { 3230 GetMDInt(E.getKind()), GetMDString(MangledName), 3231 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3232 3233 // Save this entry in the right position of the ordered entries array. 3234 OrderedEntries[E.getOrder()] = 3235 std::make_tuple(&E, SourceLocation(), MangledName); 3236 3237 // Add metadata to the named metadata node. 3238 MD->addOperand(llvm::MDNode::get(C, Ops)); 3239 }; 3240 3241 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3242 DeviceGlobalVarMetadataEmitter); 3243 3244 for (const auto &E : OrderedEntries) { 3245 assert(std::get<0>(E) && "All ordered entries must exist!"); 3246 if (const auto *CE = 3247 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3248 std::get<0>(E))) { 3249 if (!CE->getID() || !CE->getAddress()) { 3250 // Do not blame the entry if the parent funtion is not emitted. 3251 StringRef FnName = ParentFunctions[CE->getOrder()]; 3252 if (!CGM.GetGlobalValue(FnName)) 3253 continue; 3254 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3255 DiagnosticsEngine::Error, 3256 "Offloading entry for target region in %0 is incorrect: either the " 3257 "address or the ID is invalid."); 3258 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3259 continue; 3260 } 3261 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3262 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3263 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3264 OffloadEntryInfoDeviceGlobalVar>( 3265 std::get<0>(E))) { 3266 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3267 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3268 CE->getFlags()); 3269 switch (Flags) { 3270 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3271 if (CGM.getLangOpts().OpenMPIsDevice && 3272 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3273 continue; 3274 if (!CE->getAddress()) { 3275 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3276 DiagnosticsEngine::Error, "Offloading entry for declare target " 3277 "variable %0 is incorrect: the " 3278 "address is invalid."); 3279 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3280 continue; 3281 } 3282 // The vaiable has no definition - no need to add the entry. 3283 if (CE->getVarSize().isZero()) 3284 continue; 3285 break; 3286 } 3287 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3288 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3289 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3290 "Declaret target link address is set."); 3291 if (CGM.getLangOpts().OpenMPIsDevice) 3292 continue; 3293 if (!CE->getAddress()) { 3294 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3295 DiagnosticsEngine::Error, 3296 "Offloading entry for declare target variable is incorrect: the " 3297 "address is invalid."); 3298 CGM.getDiags().Report(DiagID); 3299 continue; 3300 } 3301 break; 3302 } 3303 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3304 CE->getVarSize().getQuantity(), Flags, 3305 CE->getLinkage()); 3306 } else { 3307 llvm_unreachable("Unsupported entry kind."); 3308 } 3309 } 3310 } 3311 3312 /// Loads all the offload entries information from the host IR 3313 /// metadata. 3314 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3315 // If we are in target mode, load the metadata from the host IR. This code has 3316 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3317 3318 if (!CGM.getLangOpts().OpenMPIsDevice) 3319 return; 3320 3321 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3322 return; 3323 3324 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3325 if (auto EC = Buf.getError()) { 3326 CGM.getDiags().Report(diag::err_cannot_open_file) 3327 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3328 return; 3329 } 3330 3331 llvm::LLVMContext C; 3332 auto ME = expectedToErrorOrAndEmitErrors( 3333 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3334 3335 if (auto EC = ME.getError()) { 3336 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3337 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3338 CGM.getDiags().Report(DiagID) 3339 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3340 return; 3341 } 3342 3343 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3344 if (!MD) 3345 return; 3346 3347 for (llvm::MDNode *MN : MD->operands()) { 3348 auto &&GetMDInt = [MN](unsigned Idx) { 3349 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3350 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3351 }; 3352 3353 auto &&GetMDString = [MN](unsigned Idx) { 3354 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3355 return V->getString(); 3356 }; 3357 3358 switch (GetMDInt(0)) { 3359 default: 3360 llvm_unreachable("Unexpected metadata!"); 3361 break; 3362 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3363 OffloadingEntryInfoTargetRegion: 3364 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3365 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3366 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3367 /*Order=*/GetMDInt(5)); 3368 break; 3369 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3370 OffloadingEntryInfoDeviceGlobalVar: 3371 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3372 /*MangledName=*/GetMDString(1), 3373 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3374 /*Flags=*/GetMDInt(2)), 3375 /*Order=*/GetMDInt(3)); 3376 break; 3377 } 3378 } 3379 } 3380 3381 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3382 if (!KmpRoutineEntryPtrTy) { 3383 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3384 ASTContext &C = CGM.getContext(); 3385 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3386 FunctionProtoType::ExtProtoInfo EPI; 3387 KmpRoutineEntryPtrQTy = C.getPointerType( 3388 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3389 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3390 } 3391 } 3392 3393 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3394 // Make sure the type of the entry is already created. This is the type we 3395 // have to create: 3396 // struct __tgt_offload_entry{ 3397 // void *addr; // Pointer to the offload entry info. 3398 // // (function or global) 3399 // char *name; // Name of the function or global. 3400 // size_t size; // Size of the entry info (0 if it a function). 3401 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3402 // int32_t reserved; // Reserved, to use by the runtime library. 3403 // }; 3404 if (TgtOffloadEntryQTy.isNull()) { 3405 ASTContext &C = CGM.getContext(); 3406 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3407 RD->startDefinition(); 3408 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3409 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3410 addFieldToRecordDecl(C, RD, C.getSizeType()); 3411 addFieldToRecordDecl( 3412 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3413 addFieldToRecordDecl( 3414 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3415 RD->completeDefinition(); 3416 RD->addAttr(PackedAttr::CreateImplicit(C)); 3417 TgtOffloadEntryQTy = C.getRecordType(RD); 3418 } 3419 return TgtOffloadEntryQTy; 3420 } 3421 3422 namespace { 3423 struct PrivateHelpersTy { 3424 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3425 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3426 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3427 PrivateElemInit(PrivateElemInit) {} 3428 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3429 const Expr *OriginalRef = nullptr; 3430 const VarDecl *Original = nullptr; 3431 const VarDecl *PrivateCopy = nullptr; 3432 const VarDecl *PrivateElemInit = nullptr; 3433 bool isLocalPrivate() const { 3434 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3435 } 3436 }; 3437 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3438 } // anonymous namespace 3439 3440 static bool isAllocatableDecl(const VarDecl *VD) { 3441 const VarDecl *CVD = VD->getCanonicalDecl(); 3442 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3443 return false; 3444 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3445 // Use the default allocation. 3446 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3447 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3448 !AA->getAllocator()); 3449 } 3450 3451 static RecordDecl * 3452 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3453 if (!Privates.empty()) { 3454 ASTContext &C = CGM.getContext(); 3455 // Build struct .kmp_privates_t. { 3456 // /* private vars */ 3457 // }; 3458 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3459 RD->startDefinition(); 3460 for (const auto &Pair : Privates) { 3461 const VarDecl *VD = Pair.second.Original; 3462 QualType Type = VD->getType().getNonReferenceType(); 3463 // If the private variable is a local variable with lvalue ref type, 3464 // allocate the pointer instead of the pointee type. 3465 if (Pair.second.isLocalPrivate()) { 3466 if (VD->getType()->isLValueReferenceType()) 3467 Type = C.getPointerType(Type); 3468 if (isAllocatableDecl(VD)) 3469 Type = C.getPointerType(Type); 3470 } 3471 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3472 if (VD->hasAttrs()) { 3473 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3474 E(VD->getAttrs().end()); 3475 I != E; ++I) 3476 FD->addAttr(*I); 3477 } 3478 } 3479 RD->completeDefinition(); 3480 return RD; 3481 } 3482 return nullptr; 3483 } 3484 3485 static RecordDecl * 3486 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3487 QualType KmpInt32Ty, 3488 QualType KmpRoutineEntryPointerQTy) { 3489 ASTContext &C = CGM.getContext(); 3490 // Build struct kmp_task_t { 3491 // void * shareds; 3492 // kmp_routine_entry_t routine; 3493 // kmp_int32 part_id; 3494 // kmp_cmplrdata_t data1; 3495 // kmp_cmplrdata_t data2; 3496 // For taskloops additional fields: 3497 // kmp_uint64 lb; 3498 // kmp_uint64 ub; 3499 // kmp_int64 st; 3500 // kmp_int32 liter; 3501 // void * reductions; 3502 // }; 3503 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3504 UD->startDefinition(); 3505 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3506 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3507 UD->completeDefinition(); 3508 QualType KmpCmplrdataTy = C.getRecordType(UD); 3509 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3510 RD->startDefinition(); 3511 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3512 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3513 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3514 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3515 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3516 if (isOpenMPTaskLoopDirective(Kind)) { 3517 QualType KmpUInt64Ty = 3518 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3519 QualType KmpInt64Ty = 3520 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3521 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3522 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3523 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3524 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3525 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3526 } 3527 RD->completeDefinition(); 3528 return RD; 3529 } 3530 3531 static RecordDecl * 3532 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3533 ArrayRef<PrivateDataTy> Privates) { 3534 ASTContext &C = CGM.getContext(); 3535 // Build struct kmp_task_t_with_privates { 3536 // kmp_task_t task_data; 3537 // .kmp_privates_t. privates; 3538 // }; 3539 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3540 RD->startDefinition(); 3541 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3542 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3543 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3544 RD->completeDefinition(); 3545 return RD; 3546 } 3547 3548 /// Emit a proxy function which accepts kmp_task_t as the second 3549 /// argument. 3550 /// \code 3551 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3552 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3553 /// For taskloops: 3554 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3555 /// tt->reductions, tt->shareds); 3556 /// return 0; 3557 /// } 3558 /// \endcode 3559 static llvm::Function * 3560 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3561 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3562 QualType KmpTaskTWithPrivatesPtrQTy, 3563 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3564 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3565 llvm::Value *TaskPrivatesMap) { 3566 ASTContext &C = CGM.getContext(); 3567 FunctionArgList Args; 3568 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3569 ImplicitParamDecl::Other); 3570 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3571 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3572 ImplicitParamDecl::Other); 3573 Args.push_back(&GtidArg); 3574 Args.push_back(&TaskTypeArg); 3575 const auto &TaskEntryFnInfo = 3576 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3577 llvm::FunctionType *TaskEntryTy = 3578 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3579 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3580 auto *TaskEntry = llvm::Function::Create( 3581 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3582 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3583 TaskEntry->setDoesNotRecurse(); 3584 CodeGenFunction CGF(CGM); 3585 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3586 Loc, Loc); 3587 3588 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3589 // tt, 3590 // For taskloops: 3591 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3592 // tt->task_data.shareds); 3593 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3594 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3595 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3596 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3597 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3598 const auto *KmpTaskTWithPrivatesQTyRD = 3599 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3600 LValue Base = 3601 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3602 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3603 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3604 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3605 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3606 3607 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3608 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3609 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3610 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3611 CGF.ConvertTypeForMem(SharedsPtrTy)); 3612 3613 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3614 llvm::Value *PrivatesParam; 3615 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3616 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3617 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3618 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3619 } else { 3620 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3621 } 3622 3623 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3624 TaskPrivatesMap, 3625 CGF.Builder 3626 .CreatePointerBitCastOrAddrSpaceCast( 3627 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3628 .getPointer()}; 3629 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3630 std::end(CommonArgs)); 3631 if (isOpenMPTaskLoopDirective(Kind)) { 3632 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3633 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3634 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3635 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3636 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3637 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3638 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3639 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3640 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3641 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3642 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3643 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3644 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3645 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3646 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3647 CallArgs.push_back(LBParam); 3648 CallArgs.push_back(UBParam); 3649 CallArgs.push_back(StParam); 3650 CallArgs.push_back(LIParam); 3651 CallArgs.push_back(RParam); 3652 } 3653 CallArgs.push_back(SharedsParam); 3654 3655 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3656 CallArgs); 3657 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3658 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3659 CGF.FinishFunction(); 3660 return TaskEntry; 3661 } 3662 3663 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3664 SourceLocation Loc, 3665 QualType KmpInt32Ty, 3666 QualType KmpTaskTWithPrivatesPtrQTy, 3667 QualType KmpTaskTWithPrivatesQTy) { 3668 ASTContext &C = CGM.getContext(); 3669 FunctionArgList Args; 3670 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3671 ImplicitParamDecl::Other); 3672 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3673 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3674 ImplicitParamDecl::Other); 3675 Args.push_back(&GtidArg); 3676 Args.push_back(&TaskTypeArg); 3677 const auto &DestructorFnInfo = 3678 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3679 llvm::FunctionType *DestructorFnTy = 3680 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3681 std::string Name = 3682 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3683 auto *DestructorFn = 3684 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3685 Name, &CGM.getModule()); 3686 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3687 DestructorFnInfo); 3688 DestructorFn->setDoesNotRecurse(); 3689 CodeGenFunction CGF(CGM); 3690 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3691 Args, Loc, Loc); 3692 3693 LValue Base = CGF.EmitLoadOfPointerLValue( 3694 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3695 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3696 const auto *KmpTaskTWithPrivatesQTyRD = 3697 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3698 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3699 Base = CGF.EmitLValueForField(Base, *FI); 3700 for (const auto *Field : 3701 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3702 if (QualType::DestructionKind DtorKind = 3703 Field->getType().isDestructedType()) { 3704 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3705 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3706 } 3707 } 3708 CGF.FinishFunction(); 3709 return DestructorFn; 3710 } 3711 3712 /// Emit a privates mapping function for correct handling of private and 3713 /// firstprivate variables. 3714 /// \code 3715 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3716 /// **noalias priv1,..., <tyn> **noalias privn) { 3717 /// *priv1 = &.privates.priv1; 3718 /// ...; 3719 /// *privn = &.privates.privn; 3720 /// } 3721 /// \endcode 3722 static llvm::Value * 3723 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3724 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3725 ArrayRef<PrivateDataTy> Privates) { 3726 ASTContext &C = CGM.getContext(); 3727 FunctionArgList Args; 3728 ImplicitParamDecl TaskPrivatesArg( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3731 ImplicitParamDecl::Other); 3732 Args.push_back(&TaskPrivatesArg); 3733 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3734 unsigned Counter = 1; 3735 for (const Expr *E : Data.PrivateVars) { 3736 Args.push_back(ImplicitParamDecl::Create( 3737 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3738 C.getPointerType(C.getPointerType(E->getType())) 3739 .withConst() 3740 .withRestrict(), 3741 ImplicitParamDecl::Other)); 3742 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3743 PrivateVarsPos[VD] = Counter; 3744 ++Counter; 3745 } 3746 for (const Expr *E : Data.FirstprivateVars) { 3747 Args.push_back(ImplicitParamDecl::Create( 3748 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3749 C.getPointerType(C.getPointerType(E->getType())) 3750 .withConst() 3751 .withRestrict(), 3752 ImplicitParamDecl::Other)); 3753 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3754 PrivateVarsPos[VD] = Counter; 3755 ++Counter; 3756 } 3757 for (const Expr *E : Data.LastprivateVars) { 3758 Args.push_back(ImplicitParamDecl::Create( 3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3760 C.getPointerType(C.getPointerType(E->getType())) 3761 .withConst() 3762 .withRestrict(), 3763 ImplicitParamDecl::Other)); 3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3765 PrivateVarsPos[VD] = Counter; 3766 ++Counter; 3767 } 3768 for (const VarDecl *VD : Data.PrivateLocals) { 3769 QualType Ty = VD->getType().getNonReferenceType(); 3770 if (VD->getType()->isLValueReferenceType()) 3771 Ty = C.getPointerType(Ty); 3772 if (isAllocatableDecl(VD)) 3773 Ty = C.getPointerType(Ty); 3774 Args.push_back(ImplicitParamDecl::Create( 3775 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3776 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3777 ImplicitParamDecl::Other)); 3778 PrivateVarsPos[VD] = Counter; 3779 ++Counter; 3780 } 3781 const auto &TaskPrivatesMapFnInfo = 3782 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3783 llvm::FunctionType *TaskPrivatesMapTy = 3784 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3785 std::string Name = 3786 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3787 auto *TaskPrivatesMap = llvm::Function::Create( 3788 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3789 &CGM.getModule()); 3790 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3791 TaskPrivatesMapFnInfo); 3792 if (CGM.getLangOpts().Optimize) { 3793 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3794 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3795 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3796 } 3797 CodeGenFunction CGF(CGM); 3798 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3799 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3800 3801 // *privi = &.privates.privi; 3802 LValue Base = CGF.EmitLoadOfPointerLValue( 3803 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3804 TaskPrivatesArg.getType()->castAs<PointerType>()); 3805 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3806 Counter = 0; 3807 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3808 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3809 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3810 LValue RefLVal = 3811 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3812 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3813 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3814 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3815 ++Counter; 3816 } 3817 CGF.FinishFunction(); 3818 return TaskPrivatesMap; 3819 } 3820 3821 /// Emit initialization for private variables in task-based directives. 3822 static void emitPrivatesInit(CodeGenFunction &CGF, 3823 const OMPExecutableDirective &D, 3824 Address KmpTaskSharedsPtr, LValue TDBase, 3825 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3826 QualType SharedsTy, QualType SharedsPtrTy, 3827 const OMPTaskDataTy &Data, 3828 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3829 ASTContext &C = CGF.getContext(); 3830 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3831 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3832 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3833 ? OMPD_taskloop 3834 : OMPD_task; 3835 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3836 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3837 LValue SrcBase; 3838 bool IsTargetTask = 3839 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3840 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3841 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3842 // PointersArray, SizesArray, and MappersArray. The original variables for 3843 // these arrays are not captured and we get their addresses explicitly. 3844 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3845 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3846 SrcBase = CGF.MakeAddrLValue( 3847 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3848 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3849 SharedsTy); 3850 } 3851 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3852 for (const PrivateDataTy &Pair : Privates) { 3853 // Do not initialize private locals. 3854 if (Pair.second.isLocalPrivate()) { 3855 ++FI; 3856 continue; 3857 } 3858 const VarDecl *VD = Pair.second.PrivateCopy; 3859 const Expr *Init = VD->getAnyInitializer(); 3860 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3861 !CGF.isTrivialInitializer(Init)))) { 3862 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3863 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3864 const VarDecl *OriginalVD = Pair.second.Original; 3865 // Check if the variable is the target-based BasePointersArray, 3866 // PointersArray, SizesArray, or MappersArray. 3867 LValue SharedRefLValue; 3868 QualType Type = PrivateLValue.getType(); 3869 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3870 if (IsTargetTask && !SharedField) { 3871 assert(isa<ImplicitParamDecl>(OriginalVD) && 3872 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3873 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3874 ->getNumParams() == 0 && 3875 isa<TranslationUnitDecl>( 3876 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3877 ->getDeclContext()) && 3878 "Expected artificial target data variable."); 3879 SharedRefLValue = 3880 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3881 } else if (ForDup) { 3882 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3883 SharedRefLValue = CGF.MakeAddrLValue( 3884 Address(SharedRefLValue.getPointer(CGF), 3885 C.getDeclAlign(OriginalVD)), 3886 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3887 SharedRefLValue.getTBAAInfo()); 3888 } else if (CGF.LambdaCaptureFields.count( 3889 Pair.second.Original->getCanonicalDecl()) > 0 || 3890 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3891 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3892 } else { 3893 // Processing for implicitly captured variables. 3894 InlinedOpenMPRegionRAII Region( 3895 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3896 /*HasCancel=*/false, /*NoInheritance=*/true); 3897 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3898 } 3899 if (Type->isArrayType()) { 3900 // Initialize firstprivate array. 3901 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3902 // Perform simple memcpy. 3903 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3904 } else { 3905 // Initialize firstprivate array using element-by-element 3906 // initialization. 3907 CGF.EmitOMPAggregateAssign( 3908 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3909 Type, 3910 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3911 Address SrcElement) { 3912 // Clean up any temporaries needed by the initialization. 3913 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3914 InitScope.addPrivate( 3915 Elem, [SrcElement]() -> Address { return SrcElement; }); 3916 (void)InitScope.Privatize(); 3917 // Emit initialization for single element. 3918 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3919 CGF, &CapturesInfo); 3920 CGF.EmitAnyExprToMem(Init, DestElement, 3921 Init->getType().getQualifiers(), 3922 /*IsInitializer=*/false); 3923 }); 3924 } 3925 } else { 3926 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3927 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3928 return SharedRefLValue.getAddress(CGF); 3929 }); 3930 (void)InitScope.Privatize(); 3931 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3932 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3933 /*capturedByInit=*/false); 3934 } 3935 } else { 3936 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3937 } 3938 } 3939 ++FI; 3940 } 3941 } 3942 3943 /// Check if duplication function is required for taskloops. 3944 static bool checkInitIsRequired(CodeGenFunction &CGF, 3945 ArrayRef<PrivateDataTy> Privates) { 3946 bool InitRequired = false; 3947 for (const PrivateDataTy &Pair : Privates) { 3948 if (Pair.second.isLocalPrivate()) 3949 continue; 3950 const VarDecl *VD = Pair.second.PrivateCopy; 3951 const Expr *Init = VD->getAnyInitializer(); 3952 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3953 !CGF.isTrivialInitializer(Init)); 3954 if (InitRequired) 3955 break; 3956 } 3957 return InitRequired; 3958 } 3959 3960 3961 /// Emit task_dup function (for initialization of 3962 /// private/firstprivate/lastprivate vars and last_iter flag) 3963 /// \code 3964 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3965 /// lastpriv) { 3966 /// // setup lastprivate flag 3967 /// task_dst->last = lastpriv; 3968 /// // could be constructor calls here... 3969 /// } 3970 /// \endcode 3971 static llvm::Value * 3972 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3973 const OMPExecutableDirective &D, 3974 QualType KmpTaskTWithPrivatesPtrQTy, 3975 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3976 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3977 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3978 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3979 ASTContext &C = CGM.getContext(); 3980 FunctionArgList Args; 3981 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3982 KmpTaskTWithPrivatesPtrQTy, 3983 ImplicitParamDecl::Other); 3984 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3985 KmpTaskTWithPrivatesPtrQTy, 3986 ImplicitParamDecl::Other); 3987 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3988 ImplicitParamDecl::Other); 3989 Args.push_back(&DstArg); 3990 Args.push_back(&SrcArg); 3991 Args.push_back(&LastprivArg); 3992 const auto &TaskDupFnInfo = 3993 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3994 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3995 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3996 auto *TaskDup = llvm::Function::Create( 3997 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3998 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3999 TaskDup->setDoesNotRecurse(); 4000 CodeGenFunction CGF(CGM); 4001 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4002 Loc); 4003 4004 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4005 CGF.GetAddrOfLocalVar(&DstArg), 4006 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4007 // task_dst->liter = lastpriv; 4008 if (WithLastIter) { 4009 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4010 LValue Base = CGF.EmitLValueForField( 4011 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4012 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4013 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4014 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4015 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4016 } 4017 4018 // Emit initial values for private copies (if any). 4019 assert(!Privates.empty()); 4020 Address KmpTaskSharedsPtr = Address::invalid(); 4021 if (!Data.FirstprivateVars.empty()) { 4022 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4023 CGF.GetAddrOfLocalVar(&SrcArg), 4024 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4025 LValue Base = CGF.EmitLValueForField( 4026 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4027 KmpTaskSharedsPtr = Address( 4028 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4029 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4030 KmpTaskTShareds)), 4031 Loc), 4032 CGM.getNaturalTypeAlignment(SharedsTy)); 4033 } 4034 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4035 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4036 CGF.FinishFunction(); 4037 return TaskDup; 4038 } 4039 4040 /// Checks if destructor function is required to be generated. 4041 /// \return true if cleanups are required, false otherwise. 4042 static bool 4043 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4044 ArrayRef<PrivateDataTy> Privates) { 4045 for (const PrivateDataTy &P : Privates) { 4046 if (P.second.isLocalPrivate()) 4047 continue; 4048 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4049 if (Ty.isDestructedType()) 4050 return true; 4051 } 4052 return false; 4053 } 4054 4055 namespace { 4056 /// Loop generator for OpenMP iterator expression. 4057 class OMPIteratorGeneratorScope final 4058 : public CodeGenFunction::OMPPrivateScope { 4059 CodeGenFunction &CGF; 4060 const OMPIteratorExpr *E = nullptr; 4061 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4062 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4063 OMPIteratorGeneratorScope() = delete; 4064 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4065 4066 public: 4067 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4068 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4069 if (!E) 4070 return; 4071 SmallVector<llvm::Value *, 4> Uppers; 4072 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4073 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4074 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4075 addPrivate(VD, [&CGF, VD]() { 4076 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4077 }); 4078 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4079 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4080 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4081 "counter.addr"); 4082 }); 4083 } 4084 Privatize(); 4085 4086 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4087 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4088 LValue CLVal = 4089 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4090 HelperData.CounterVD->getType()); 4091 // Counter = 0; 4092 CGF.EmitStoreOfScalar( 4093 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4094 CLVal); 4095 CodeGenFunction::JumpDest &ContDest = 4096 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4097 CodeGenFunction::JumpDest &ExitDest = 4098 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4099 // N = <number-of_iterations>; 4100 llvm::Value *N = Uppers[I]; 4101 // cont: 4102 // if (Counter < N) goto body; else goto exit; 4103 CGF.EmitBlock(ContDest.getBlock()); 4104 auto *CVal = 4105 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4106 llvm::Value *Cmp = 4107 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4108 ? CGF.Builder.CreateICmpSLT(CVal, N) 4109 : CGF.Builder.CreateICmpULT(CVal, N); 4110 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4111 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4112 // body: 4113 CGF.EmitBlock(BodyBB); 4114 // Iteri = Begini + Counter * Stepi; 4115 CGF.EmitIgnoredExpr(HelperData.Update); 4116 } 4117 } 4118 ~OMPIteratorGeneratorScope() { 4119 if (!E) 4120 return; 4121 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4122 // Counter = Counter + 1; 4123 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4124 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4125 // goto cont; 4126 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4127 // exit: 4128 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4129 } 4130 } 4131 }; 4132 } // namespace 4133 4134 static std::pair<llvm::Value *, llvm::Value *> 4135 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4136 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4137 llvm::Value *Addr; 4138 if (OASE) { 4139 const Expr *Base = OASE->getBase(); 4140 Addr = CGF.EmitScalarExpr(Base); 4141 } else { 4142 Addr = CGF.EmitLValue(E).getPointer(CGF); 4143 } 4144 llvm::Value *SizeVal; 4145 QualType Ty = E->getType(); 4146 if (OASE) { 4147 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4148 for (const Expr *SE : OASE->getDimensions()) { 4149 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4150 Sz = CGF.EmitScalarConversion( 4151 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4152 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4153 } 4154 } else if (const auto *ASE = 4155 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4156 LValue UpAddrLVal = 4157 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4158 llvm::Value *UpAddr = 4159 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4160 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4161 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4162 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4163 } else { 4164 SizeVal = CGF.getTypeSize(Ty); 4165 } 4166 return std::make_pair(Addr, SizeVal); 4167 } 4168 4169 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4170 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4171 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4172 if (KmpTaskAffinityInfoTy.isNull()) { 4173 RecordDecl *KmpAffinityInfoRD = 4174 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4175 KmpAffinityInfoRD->startDefinition(); 4176 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4177 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4178 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4179 KmpAffinityInfoRD->completeDefinition(); 4180 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4181 } 4182 } 4183 4184 CGOpenMPRuntime::TaskResultTy 4185 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4186 const OMPExecutableDirective &D, 4187 llvm::Function *TaskFunction, QualType SharedsTy, 4188 Address Shareds, const OMPTaskDataTy &Data) { 4189 ASTContext &C = CGM.getContext(); 4190 llvm::SmallVector<PrivateDataTy, 4> Privates; 4191 // Aggregate privates and sort them by the alignment. 4192 const auto *I = Data.PrivateCopies.begin(); 4193 for (const Expr *E : Data.PrivateVars) { 4194 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4195 Privates.emplace_back( 4196 C.getDeclAlign(VD), 4197 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4198 /*PrivateElemInit=*/nullptr)); 4199 ++I; 4200 } 4201 I = Data.FirstprivateCopies.begin(); 4202 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4203 for (const Expr *E : Data.FirstprivateVars) { 4204 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4205 Privates.emplace_back( 4206 C.getDeclAlign(VD), 4207 PrivateHelpersTy( 4208 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4209 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4210 ++I; 4211 ++IElemInitRef; 4212 } 4213 I = Data.LastprivateCopies.begin(); 4214 for (const Expr *E : Data.LastprivateVars) { 4215 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4216 Privates.emplace_back( 4217 C.getDeclAlign(VD), 4218 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4219 /*PrivateElemInit=*/nullptr)); 4220 ++I; 4221 } 4222 for (const VarDecl *VD : Data.PrivateLocals) { 4223 if (isAllocatableDecl(VD)) 4224 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4225 else 4226 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4227 } 4228 llvm::stable_sort(Privates, 4229 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4230 return L.first > R.first; 4231 }); 4232 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4233 // Build type kmp_routine_entry_t (if not built yet). 4234 emitKmpRoutineEntryT(KmpInt32Ty); 4235 // Build type kmp_task_t (if not built yet). 4236 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4237 if (SavedKmpTaskloopTQTy.isNull()) { 4238 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4239 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4240 } 4241 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4242 } else { 4243 assert((D.getDirectiveKind() == OMPD_task || 4244 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4245 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4246 "Expected taskloop, task or target directive"); 4247 if (SavedKmpTaskTQTy.isNull()) { 4248 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4249 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4250 } 4251 KmpTaskTQTy = SavedKmpTaskTQTy; 4252 } 4253 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4254 // Build particular struct kmp_task_t for the given task. 4255 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4256 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4257 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4258 QualType KmpTaskTWithPrivatesPtrQTy = 4259 C.getPointerType(KmpTaskTWithPrivatesQTy); 4260 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4261 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4262 KmpTaskTWithPrivatesTy->getPointerTo(); 4263 llvm::Value *KmpTaskTWithPrivatesTySize = 4264 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4265 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4266 4267 // Emit initial values for private copies (if any). 4268 llvm::Value *TaskPrivatesMap = nullptr; 4269 llvm::Type *TaskPrivatesMapTy = 4270 std::next(TaskFunction->arg_begin(), 3)->getType(); 4271 if (!Privates.empty()) { 4272 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4273 TaskPrivatesMap = 4274 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4275 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4276 TaskPrivatesMap, TaskPrivatesMapTy); 4277 } else { 4278 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4279 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4280 } 4281 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4282 // kmp_task_t *tt); 4283 llvm::Function *TaskEntry = emitProxyTaskFunction( 4284 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4285 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4286 TaskPrivatesMap); 4287 4288 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4289 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4290 // kmp_routine_entry_t *task_entry); 4291 // Task flags. Format is taken from 4292 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4293 // description of kmp_tasking_flags struct. 4294 enum { 4295 TiedFlag = 0x1, 4296 FinalFlag = 0x2, 4297 DestructorsFlag = 0x8, 4298 PriorityFlag = 0x20, 4299 DetachableFlag = 0x40, 4300 }; 4301 unsigned Flags = Data.Tied ? TiedFlag : 0; 4302 bool NeedsCleanup = false; 4303 if (!Privates.empty()) { 4304 NeedsCleanup = 4305 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4306 if (NeedsCleanup) 4307 Flags = Flags | DestructorsFlag; 4308 } 4309 if (Data.Priority.getInt()) 4310 Flags = Flags | PriorityFlag; 4311 if (D.hasClausesOfKind<OMPDetachClause>()) 4312 Flags = Flags | DetachableFlag; 4313 llvm::Value *TaskFlags = 4314 Data.Final.getPointer() 4315 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4316 CGF.Builder.getInt32(FinalFlag), 4317 CGF.Builder.getInt32(/*C=*/0)) 4318 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4319 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4320 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4321 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4322 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4323 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4324 TaskEntry, KmpRoutineEntryPtrTy)}; 4325 llvm::Value *NewTask; 4326 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4327 // Check if we have any device clause associated with the directive. 4328 const Expr *Device = nullptr; 4329 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4330 Device = C->getDevice(); 4331 // Emit device ID if any otherwise use default value. 4332 llvm::Value *DeviceID; 4333 if (Device) 4334 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4335 CGF.Int64Ty, /*isSigned=*/true); 4336 else 4337 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4338 AllocArgs.push_back(DeviceID); 4339 NewTask = CGF.EmitRuntimeCall( 4340 OMPBuilder.getOrCreateRuntimeFunction( 4341 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4342 AllocArgs); 4343 } else { 4344 NewTask = 4345 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4346 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4347 AllocArgs); 4348 } 4349 // Emit detach clause initialization. 4350 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4351 // task_descriptor); 4352 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4353 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4354 LValue EvtLVal = CGF.EmitLValue(Evt); 4355 4356 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4357 // int gtid, kmp_task_t *task); 4358 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4359 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4360 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4361 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4362 OMPBuilder.getOrCreateRuntimeFunction( 4363 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4364 {Loc, Tid, NewTask}); 4365 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4366 Evt->getExprLoc()); 4367 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4368 } 4369 // Process affinity clauses. 4370 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4371 // Process list of affinity data. 4372 ASTContext &C = CGM.getContext(); 4373 Address AffinitiesArray = Address::invalid(); 4374 // Calculate number of elements to form the array of affinity data. 4375 llvm::Value *NumOfElements = nullptr; 4376 unsigned NumAffinities = 0; 4377 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4378 if (const Expr *Modifier = C->getModifier()) { 4379 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4380 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4381 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4382 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4383 NumOfElements = 4384 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4385 } 4386 } else { 4387 NumAffinities += C->varlist_size(); 4388 } 4389 } 4390 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4391 // Fields ids in kmp_task_affinity_info record. 4392 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4393 4394 QualType KmpTaskAffinityInfoArrayTy; 4395 if (NumOfElements) { 4396 NumOfElements = CGF.Builder.CreateNUWAdd( 4397 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4398 OpaqueValueExpr OVE( 4399 Loc, 4400 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4401 VK_PRValue); 4402 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4403 RValue::get(NumOfElements)); 4404 KmpTaskAffinityInfoArrayTy = 4405 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4406 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4407 // Properly emit variable-sized array. 4408 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4409 ImplicitParamDecl::Other); 4410 CGF.EmitVarDecl(*PD); 4411 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4412 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4413 /*isSigned=*/false); 4414 } else { 4415 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4416 KmpTaskAffinityInfoTy, 4417 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4418 ArrayType::Normal, /*IndexTypeQuals=*/0); 4419 AffinitiesArray = 4420 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4421 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4422 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4423 /*isSigned=*/false); 4424 } 4425 4426 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4427 // Fill array by elements without iterators. 4428 unsigned Pos = 0; 4429 bool HasIterator = false; 4430 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4431 if (C->getModifier()) { 4432 HasIterator = true; 4433 continue; 4434 } 4435 for (const Expr *E : C->varlists()) { 4436 llvm::Value *Addr; 4437 llvm::Value *Size; 4438 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4439 LValue Base = 4440 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4441 KmpTaskAffinityInfoTy); 4442 // affs[i].base_addr = &<Affinities[i].second>; 4443 LValue BaseAddrLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4445 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4446 BaseAddrLVal); 4447 // affs[i].len = sizeof(<Affinities[i].second>); 4448 LValue LenLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4450 CGF.EmitStoreOfScalar(Size, LenLVal); 4451 ++Pos; 4452 } 4453 } 4454 LValue PosLVal; 4455 if (HasIterator) { 4456 PosLVal = CGF.MakeAddrLValue( 4457 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4458 C.getSizeType()); 4459 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4460 } 4461 // Process elements with iterators. 4462 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4463 const Expr *Modifier = C->getModifier(); 4464 if (!Modifier) 4465 continue; 4466 OMPIteratorGeneratorScope IteratorScope( 4467 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4468 for (const Expr *E : C->varlists()) { 4469 llvm::Value *Addr; 4470 llvm::Value *Size; 4471 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4472 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4473 LValue Base = CGF.MakeAddrLValue( 4474 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4475 AffinitiesArray.getAlignment()), 4476 KmpTaskAffinityInfoTy); 4477 // affs[i].base_addr = &<Affinities[i].second>; 4478 LValue BaseAddrLVal = CGF.EmitLValueForField( 4479 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4480 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4481 BaseAddrLVal); 4482 // affs[i].len = sizeof(<Affinities[i].second>); 4483 LValue LenLVal = CGF.EmitLValueForField( 4484 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4485 CGF.EmitStoreOfScalar(Size, LenLVal); 4486 Idx = CGF.Builder.CreateNUWAdd( 4487 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4488 CGF.EmitStoreOfScalar(Idx, PosLVal); 4489 } 4490 } 4491 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4492 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4493 // naffins, kmp_task_affinity_info_t *affin_list); 4494 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4495 llvm::Value *GTid = getThreadID(CGF, Loc); 4496 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4497 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4498 // FIXME: Emit the function and ignore its result for now unless the 4499 // runtime function is properly implemented. 4500 (void)CGF.EmitRuntimeCall( 4501 OMPBuilder.getOrCreateRuntimeFunction( 4502 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4503 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4504 } 4505 llvm::Value *NewTaskNewTaskTTy = 4506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4507 NewTask, KmpTaskTWithPrivatesPtrTy); 4508 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4509 KmpTaskTWithPrivatesQTy); 4510 LValue TDBase = 4511 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4512 // Fill the data in the resulting kmp_task_t record. 4513 // Copy shareds if there are any. 4514 Address KmpTaskSharedsPtr = Address::invalid(); 4515 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4516 KmpTaskSharedsPtr = 4517 Address(CGF.EmitLoadOfScalar( 4518 CGF.EmitLValueForField( 4519 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4520 KmpTaskTShareds)), 4521 Loc), 4522 CGM.getNaturalTypeAlignment(SharedsTy)); 4523 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4524 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4525 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4526 } 4527 // Emit initial values for private copies (if any). 4528 TaskResultTy Result; 4529 if (!Privates.empty()) { 4530 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4531 SharedsTy, SharedsPtrTy, Data, Privates, 4532 /*ForDup=*/false); 4533 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4534 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4535 Result.TaskDupFn = emitTaskDupFunction( 4536 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4537 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4538 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4539 } 4540 } 4541 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4542 enum { Priority = 0, Destructors = 1 }; 4543 // Provide pointer to function with destructors for privates. 4544 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4545 const RecordDecl *KmpCmplrdataUD = 4546 (*FI)->getType()->getAsUnionType()->getDecl(); 4547 if (NeedsCleanup) { 4548 llvm::Value *DestructorFn = emitDestructorsFunction( 4549 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4550 KmpTaskTWithPrivatesQTy); 4551 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4552 LValue DestructorsLV = CGF.EmitLValueForField( 4553 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4554 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4555 DestructorFn, KmpRoutineEntryPtrTy), 4556 DestructorsLV); 4557 } 4558 // Set priority. 4559 if (Data.Priority.getInt()) { 4560 LValue Data2LV = CGF.EmitLValueForField( 4561 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4562 LValue PriorityLV = CGF.EmitLValueForField( 4563 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4564 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4565 } 4566 Result.NewTask = NewTask; 4567 Result.TaskEntry = TaskEntry; 4568 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4569 Result.TDBase = TDBase; 4570 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4571 return Result; 4572 } 4573 4574 namespace { 4575 /// Dependence kind for RTL. 4576 enum RTLDependenceKindTy { 4577 DepIn = 0x01, 4578 DepInOut = 0x3, 4579 DepMutexInOutSet = 0x4 4580 }; 4581 /// Fields ids in kmp_depend_info record. 4582 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4583 } // namespace 4584 4585 /// Translates internal dependency kind into the runtime kind. 4586 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4587 RTLDependenceKindTy DepKind; 4588 switch (K) { 4589 case OMPC_DEPEND_in: 4590 DepKind = DepIn; 4591 break; 4592 // Out and InOut dependencies must use the same code. 4593 case OMPC_DEPEND_out: 4594 case OMPC_DEPEND_inout: 4595 DepKind = DepInOut; 4596 break; 4597 case OMPC_DEPEND_mutexinoutset: 4598 DepKind = DepMutexInOutSet; 4599 break; 4600 case OMPC_DEPEND_source: 4601 case OMPC_DEPEND_sink: 4602 case OMPC_DEPEND_depobj: 4603 case OMPC_DEPEND_unknown: 4604 llvm_unreachable("Unknown task dependence type"); 4605 } 4606 return DepKind; 4607 } 4608 4609 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4610 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4611 QualType &FlagsTy) { 4612 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4613 if (KmpDependInfoTy.isNull()) { 4614 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4615 KmpDependInfoRD->startDefinition(); 4616 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4617 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4618 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4619 KmpDependInfoRD->completeDefinition(); 4620 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4621 } 4622 } 4623 4624 std::pair<llvm::Value *, LValue> 4625 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4626 SourceLocation Loc) { 4627 ASTContext &C = CGM.getContext(); 4628 QualType FlagsTy; 4629 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4630 RecordDecl *KmpDependInfoRD = 4631 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4632 LValue Base = CGF.EmitLoadOfPointerLValue( 4633 DepobjLVal.getAddress(CGF), 4634 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4635 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4636 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4637 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4638 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4639 Base.getTBAAInfo()); 4640 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4641 Addr.getPointer(), 4642 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4643 LValue NumDepsBase = CGF.MakeAddrLValue( 4644 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4645 Base.getBaseInfo(), Base.getTBAAInfo()); 4646 // NumDeps = deps[i].base_addr; 4647 LValue BaseAddrLVal = CGF.EmitLValueForField( 4648 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4649 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4650 return std::make_pair(NumDeps, Base); 4651 } 4652 4653 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4654 llvm::PointerUnion<unsigned *, LValue *> Pos, 4655 const OMPTaskDataTy::DependData &Data, 4656 Address DependenciesArray) { 4657 CodeGenModule &CGM = CGF.CGM; 4658 ASTContext &C = CGM.getContext(); 4659 QualType FlagsTy; 4660 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4661 RecordDecl *KmpDependInfoRD = 4662 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4663 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4664 4665 OMPIteratorGeneratorScope IteratorScope( 4666 CGF, cast_or_null<OMPIteratorExpr>( 4667 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4668 : nullptr)); 4669 for (const Expr *E : Data.DepExprs) { 4670 llvm::Value *Addr; 4671 llvm::Value *Size; 4672 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4673 LValue Base; 4674 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4675 Base = CGF.MakeAddrLValue( 4676 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4677 } else { 4678 LValue &PosLVal = *Pos.get<LValue *>(); 4679 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4680 Base = CGF.MakeAddrLValue( 4681 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4682 DependenciesArray.getAlignment()), 4683 KmpDependInfoTy); 4684 } 4685 // deps[i].base_addr = &<Dependencies[i].second>; 4686 LValue BaseAddrLVal = CGF.EmitLValueForField( 4687 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4688 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4689 BaseAddrLVal); 4690 // deps[i].len = sizeof(<Dependencies[i].second>); 4691 LValue LenLVal = CGF.EmitLValueForField( 4692 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4693 CGF.EmitStoreOfScalar(Size, LenLVal); 4694 // deps[i].flags = <Dependencies[i].first>; 4695 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4696 LValue FlagsLVal = CGF.EmitLValueForField( 4697 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4698 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4699 FlagsLVal); 4700 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4701 ++(*P); 4702 } else { 4703 LValue &PosLVal = *Pos.get<LValue *>(); 4704 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4705 Idx = CGF.Builder.CreateNUWAdd(Idx, 4706 llvm::ConstantInt::get(Idx->getType(), 1)); 4707 CGF.EmitStoreOfScalar(Idx, PosLVal); 4708 } 4709 } 4710 } 4711 4712 static SmallVector<llvm::Value *, 4> 4713 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4714 const OMPTaskDataTy::DependData &Data) { 4715 assert(Data.DepKind == OMPC_DEPEND_depobj && 4716 "Expected depobj dependecy kind."); 4717 SmallVector<llvm::Value *, 4> Sizes; 4718 SmallVector<LValue, 4> SizeLVals; 4719 ASTContext &C = CGF.getContext(); 4720 QualType FlagsTy; 4721 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4722 RecordDecl *KmpDependInfoRD = 4723 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4724 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4725 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4726 { 4727 OMPIteratorGeneratorScope IteratorScope( 4728 CGF, cast_or_null<OMPIteratorExpr>( 4729 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4730 : nullptr)); 4731 for (const Expr *E : Data.DepExprs) { 4732 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4733 LValue Base = CGF.EmitLoadOfPointerLValue( 4734 DepobjLVal.getAddress(CGF), 4735 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4736 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4737 Base.getAddress(CGF), KmpDependInfoPtrT); 4738 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4739 Base.getTBAAInfo()); 4740 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4741 Addr.getPointer(), 4742 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4743 LValue NumDepsBase = CGF.MakeAddrLValue( 4744 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4745 Base.getBaseInfo(), Base.getTBAAInfo()); 4746 // NumDeps = deps[i].base_addr; 4747 LValue BaseAddrLVal = CGF.EmitLValueForField( 4748 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4749 llvm::Value *NumDeps = 4750 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4751 LValue NumLVal = CGF.MakeAddrLValue( 4752 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4753 C.getUIntPtrType()); 4754 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4755 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4756 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4757 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4758 CGF.EmitStoreOfScalar(Add, NumLVal); 4759 SizeLVals.push_back(NumLVal); 4760 } 4761 } 4762 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4763 llvm::Value *Size = 4764 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4765 Sizes.push_back(Size); 4766 } 4767 return Sizes; 4768 } 4769 4770 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4771 LValue PosLVal, 4772 const OMPTaskDataTy::DependData &Data, 4773 Address DependenciesArray) { 4774 assert(Data.DepKind == OMPC_DEPEND_depobj && 4775 "Expected depobj dependecy kind."); 4776 ASTContext &C = CGF.getContext(); 4777 QualType FlagsTy; 4778 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4779 RecordDecl *KmpDependInfoRD = 4780 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4781 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4782 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4783 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4784 { 4785 OMPIteratorGeneratorScope IteratorScope( 4786 CGF, cast_or_null<OMPIteratorExpr>( 4787 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4788 : nullptr)); 4789 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4790 const Expr *E = Data.DepExprs[I]; 4791 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4792 LValue Base = CGF.EmitLoadOfPointerLValue( 4793 DepobjLVal.getAddress(CGF), 4794 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4795 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4796 Base.getAddress(CGF), KmpDependInfoPtrT); 4797 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4798 Base.getTBAAInfo()); 4799 4800 // Get number of elements in a single depobj. 4801 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4802 Addr.getPointer(), 4803 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4804 LValue NumDepsBase = CGF.MakeAddrLValue( 4805 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4806 Base.getBaseInfo(), Base.getTBAAInfo()); 4807 // NumDeps = deps[i].base_addr; 4808 LValue BaseAddrLVal = CGF.EmitLValueForField( 4809 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4810 llvm::Value *NumDeps = 4811 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4812 4813 // memcopy dependency data. 4814 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4815 ElSize, 4816 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4817 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4818 Address DepAddr = 4819 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4820 DependenciesArray.getAlignment()); 4821 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4822 4823 // Increase pos. 4824 // pos += size; 4825 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4826 CGF.EmitStoreOfScalar(Add, PosLVal); 4827 } 4828 } 4829 } 4830 4831 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4832 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4833 SourceLocation Loc) { 4834 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4835 return D.DepExprs.empty(); 4836 })) 4837 return std::make_pair(nullptr, Address::invalid()); 4838 // Process list of dependencies. 4839 ASTContext &C = CGM.getContext(); 4840 Address DependenciesArray = Address::invalid(); 4841 llvm::Value *NumOfElements = nullptr; 4842 unsigned NumDependencies = std::accumulate( 4843 Dependencies.begin(), Dependencies.end(), 0, 4844 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4845 return D.DepKind == OMPC_DEPEND_depobj 4846 ? V 4847 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4848 }); 4849 QualType FlagsTy; 4850 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4851 bool HasDepobjDeps = false; 4852 bool HasRegularWithIterators = false; 4853 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4854 llvm::Value *NumOfRegularWithIterators = 4855 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4856 // Calculate number of depobj dependecies and regular deps with the iterators. 4857 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4858 if (D.DepKind == OMPC_DEPEND_depobj) { 4859 SmallVector<llvm::Value *, 4> Sizes = 4860 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4861 for (llvm::Value *Size : Sizes) { 4862 NumOfDepobjElements = 4863 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4864 } 4865 HasDepobjDeps = true; 4866 continue; 4867 } 4868 // Include number of iterations, if any. 4869 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4870 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4871 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4872 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4873 NumOfRegularWithIterators = 4874 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4875 } 4876 HasRegularWithIterators = true; 4877 continue; 4878 } 4879 } 4880 4881 QualType KmpDependInfoArrayTy; 4882 if (HasDepobjDeps || HasRegularWithIterators) { 4883 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4884 /*isSigned=*/false); 4885 if (HasDepobjDeps) { 4886 NumOfElements = 4887 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4888 } 4889 if (HasRegularWithIterators) { 4890 NumOfElements = 4891 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4892 } 4893 OpaqueValueExpr OVE(Loc, 4894 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4895 VK_PRValue); 4896 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4897 RValue::get(NumOfElements)); 4898 KmpDependInfoArrayTy = 4899 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4900 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4901 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4902 // Properly emit variable-sized array. 4903 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4904 ImplicitParamDecl::Other); 4905 CGF.EmitVarDecl(*PD); 4906 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4907 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4908 /*isSigned=*/false); 4909 } else { 4910 KmpDependInfoArrayTy = C.getConstantArrayType( 4911 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4912 ArrayType::Normal, /*IndexTypeQuals=*/0); 4913 DependenciesArray = 4914 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4915 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4916 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4917 /*isSigned=*/false); 4918 } 4919 unsigned Pos = 0; 4920 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4921 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4922 Dependencies[I].IteratorExpr) 4923 continue; 4924 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4925 DependenciesArray); 4926 } 4927 // Copy regular dependecies with iterators. 4928 LValue PosLVal = CGF.MakeAddrLValue( 4929 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4930 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4931 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4932 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4933 !Dependencies[I].IteratorExpr) 4934 continue; 4935 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4936 DependenciesArray); 4937 } 4938 // Copy final depobj arrays without iterators. 4939 if (HasDepobjDeps) { 4940 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4941 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4942 continue; 4943 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4944 DependenciesArray); 4945 } 4946 } 4947 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4948 DependenciesArray, CGF.VoidPtrTy); 4949 return std::make_pair(NumOfElements, DependenciesArray); 4950 } 4951 4952 Address CGOpenMPRuntime::emitDepobjDependClause( 4953 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4954 SourceLocation Loc) { 4955 if (Dependencies.DepExprs.empty()) 4956 return Address::invalid(); 4957 // Process list of dependencies. 4958 ASTContext &C = CGM.getContext(); 4959 Address DependenciesArray = Address::invalid(); 4960 unsigned NumDependencies = Dependencies.DepExprs.size(); 4961 QualType FlagsTy; 4962 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4963 RecordDecl *KmpDependInfoRD = 4964 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4965 4966 llvm::Value *Size; 4967 // Define type kmp_depend_info[<Dependencies.size()>]; 4968 // For depobj reserve one extra element to store the number of elements. 4969 // It is required to handle depobj(x) update(in) construct. 4970 // kmp_depend_info[<Dependencies.size()>] deps; 4971 llvm::Value *NumDepsVal; 4972 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4973 if (const auto *IE = 4974 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4975 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4976 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4977 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4978 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4979 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4980 } 4981 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4982 NumDepsVal); 4983 CharUnits SizeInBytes = 4984 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4985 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4986 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4987 NumDepsVal = 4988 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4989 } else { 4990 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4991 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4992 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4993 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4994 Size = CGM.getSize(Sz.alignTo(Align)); 4995 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4996 } 4997 // Need to allocate on the dynamic memory. 4998 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4999 // Use default allocator. 5000 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5001 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5002 5003 llvm::Value *Addr = 5004 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5005 CGM.getModule(), OMPRTL___kmpc_alloc), 5006 Args, ".dep.arr.addr"); 5007 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5008 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5009 DependenciesArray = Address(Addr, Align); 5010 // Write number of elements in the first element of array for depobj. 5011 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5012 // deps[i].base_addr = NumDependencies; 5013 LValue BaseAddrLVal = CGF.EmitLValueForField( 5014 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5015 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5016 llvm::PointerUnion<unsigned *, LValue *> Pos; 5017 unsigned Idx = 1; 5018 LValue PosLVal; 5019 if (Dependencies.IteratorExpr) { 5020 PosLVal = CGF.MakeAddrLValue( 5021 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5022 C.getSizeType()); 5023 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5024 /*IsInit=*/true); 5025 Pos = &PosLVal; 5026 } else { 5027 Pos = &Idx; 5028 } 5029 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5030 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5031 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5032 return DependenciesArray; 5033 } 5034 5035 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5036 SourceLocation Loc) { 5037 ASTContext &C = CGM.getContext(); 5038 QualType FlagsTy; 5039 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5040 LValue Base = CGF.EmitLoadOfPointerLValue( 5041 DepobjLVal.getAddress(CGF), 5042 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5043 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5044 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5045 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5046 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5047 Addr.getPointer(), 5048 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5049 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5050 CGF.VoidPtrTy); 5051 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5052 // Use default allocator. 5053 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5054 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5055 5056 // _kmpc_free(gtid, addr, nullptr); 5057 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5058 CGM.getModule(), OMPRTL___kmpc_free), 5059 Args); 5060 } 5061 5062 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5063 OpenMPDependClauseKind NewDepKind, 5064 SourceLocation Loc) { 5065 ASTContext &C = CGM.getContext(); 5066 QualType FlagsTy; 5067 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5068 RecordDecl *KmpDependInfoRD = 5069 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5070 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5071 llvm::Value *NumDeps; 5072 LValue Base; 5073 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5074 5075 Address Begin = Base.getAddress(CGF); 5076 // Cast from pointer to array type to pointer to single element. 5077 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5078 // The basic structure here is a while-do loop. 5079 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5080 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5081 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5082 CGF.EmitBlock(BodyBB); 5083 llvm::PHINode *ElementPHI = 5084 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5085 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5086 Begin = Address(ElementPHI, Begin.getAlignment()); 5087 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5088 Base.getTBAAInfo()); 5089 // deps[i].flags = NewDepKind; 5090 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5091 LValue FlagsLVal = CGF.EmitLValueForField( 5092 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5093 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5094 FlagsLVal); 5095 5096 // Shift the address forward by one element. 5097 Address ElementNext = 5098 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5099 ElementPHI->addIncoming(ElementNext.getPointer(), 5100 CGF.Builder.GetInsertBlock()); 5101 llvm::Value *IsEmpty = 5102 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5103 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5104 // Done. 5105 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5106 } 5107 5108 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5109 const OMPExecutableDirective &D, 5110 llvm::Function *TaskFunction, 5111 QualType SharedsTy, Address Shareds, 5112 const Expr *IfCond, 5113 const OMPTaskDataTy &Data) { 5114 if (!CGF.HaveInsertPoint()) 5115 return; 5116 5117 TaskResultTy Result = 5118 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5119 llvm::Value *NewTask = Result.NewTask; 5120 llvm::Function *TaskEntry = Result.TaskEntry; 5121 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5122 LValue TDBase = Result.TDBase; 5123 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5124 // Process list of dependences. 5125 Address DependenciesArray = Address::invalid(); 5126 llvm::Value *NumOfElements; 5127 std::tie(NumOfElements, DependenciesArray) = 5128 emitDependClause(CGF, Data.Dependences, Loc); 5129 5130 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5131 // libcall. 5132 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5133 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5134 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5135 // list is not empty 5136 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5137 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5138 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5139 llvm::Value *DepTaskArgs[7]; 5140 if (!Data.Dependences.empty()) { 5141 DepTaskArgs[0] = UpLoc; 5142 DepTaskArgs[1] = ThreadID; 5143 DepTaskArgs[2] = NewTask; 5144 DepTaskArgs[3] = NumOfElements; 5145 DepTaskArgs[4] = DependenciesArray.getPointer(); 5146 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5147 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5148 } 5149 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5150 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5151 if (!Data.Tied) { 5152 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5153 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5154 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5155 } 5156 if (!Data.Dependences.empty()) { 5157 CGF.EmitRuntimeCall( 5158 OMPBuilder.getOrCreateRuntimeFunction( 5159 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5160 DepTaskArgs); 5161 } else { 5162 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5163 CGM.getModule(), OMPRTL___kmpc_omp_task), 5164 TaskArgs); 5165 } 5166 // Check if parent region is untied and build return for untied task; 5167 if (auto *Region = 5168 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5169 Region->emitUntiedSwitch(CGF); 5170 }; 5171 5172 llvm::Value *DepWaitTaskArgs[6]; 5173 if (!Data.Dependences.empty()) { 5174 DepWaitTaskArgs[0] = UpLoc; 5175 DepWaitTaskArgs[1] = ThreadID; 5176 DepWaitTaskArgs[2] = NumOfElements; 5177 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5178 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5179 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5180 } 5181 auto &M = CGM.getModule(); 5182 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5183 TaskEntry, &Data, &DepWaitTaskArgs, 5184 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5185 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5186 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5187 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5188 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5189 // is specified. 5190 if (!Data.Dependences.empty()) 5191 CGF.EmitRuntimeCall( 5192 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5193 DepWaitTaskArgs); 5194 // Call proxy_task_entry(gtid, new_task); 5195 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5196 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5197 Action.Enter(CGF); 5198 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5199 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5200 OutlinedFnArgs); 5201 }; 5202 5203 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5204 // kmp_task_t *new_task); 5205 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5206 // kmp_task_t *new_task); 5207 RegionCodeGenTy RCG(CodeGen); 5208 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5209 M, OMPRTL___kmpc_omp_task_begin_if0), 5210 TaskArgs, 5211 OMPBuilder.getOrCreateRuntimeFunction( 5212 M, OMPRTL___kmpc_omp_task_complete_if0), 5213 TaskArgs); 5214 RCG.setAction(Action); 5215 RCG(CGF); 5216 }; 5217 5218 if (IfCond) { 5219 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5220 } else { 5221 RegionCodeGenTy ThenRCG(ThenCodeGen); 5222 ThenRCG(CGF); 5223 } 5224 } 5225 5226 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5227 const OMPLoopDirective &D, 5228 llvm::Function *TaskFunction, 5229 QualType SharedsTy, Address Shareds, 5230 const Expr *IfCond, 5231 const OMPTaskDataTy &Data) { 5232 if (!CGF.HaveInsertPoint()) 5233 return; 5234 TaskResultTy Result = 5235 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5236 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5237 // libcall. 5238 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5239 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5240 // sched, kmp_uint64 grainsize, void *task_dup); 5241 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5242 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5243 llvm::Value *IfVal; 5244 if (IfCond) { 5245 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5246 /*isSigned=*/true); 5247 } else { 5248 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5249 } 5250 5251 LValue LBLVal = CGF.EmitLValueForField( 5252 Result.TDBase, 5253 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5254 const auto *LBVar = 5255 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5256 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5257 LBLVal.getQuals(), 5258 /*IsInitializer=*/true); 5259 LValue UBLVal = CGF.EmitLValueForField( 5260 Result.TDBase, 5261 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5262 const auto *UBVar = 5263 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5264 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5265 UBLVal.getQuals(), 5266 /*IsInitializer=*/true); 5267 LValue StLVal = CGF.EmitLValueForField( 5268 Result.TDBase, 5269 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5270 const auto *StVar = 5271 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5272 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5273 StLVal.getQuals(), 5274 /*IsInitializer=*/true); 5275 // Store reductions address. 5276 LValue RedLVal = CGF.EmitLValueForField( 5277 Result.TDBase, 5278 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5279 if (Data.Reductions) { 5280 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5281 } else { 5282 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5283 CGF.getContext().VoidPtrTy); 5284 } 5285 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5286 llvm::Value *TaskArgs[] = { 5287 UpLoc, 5288 ThreadID, 5289 Result.NewTask, 5290 IfVal, 5291 LBLVal.getPointer(CGF), 5292 UBLVal.getPointer(CGF), 5293 CGF.EmitLoadOfScalar(StLVal, Loc), 5294 llvm::ConstantInt::getSigned( 5295 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5296 llvm::ConstantInt::getSigned( 5297 CGF.IntTy, Data.Schedule.getPointer() 5298 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5299 : NoSchedule), 5300 Data.Schedule.getPointer() 5301 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5302 /*isSigned=*/false) 5303 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5304 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5305 Result.TaskDupFn, CGF.VoidPtrTy) 5306 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5307 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5308 CGM.getModule(), OMPRTL___kmpc_taskloop), 5309 TaskArgs); 5310 } 5311 5312 /// Emit reduction operation for each element of array (required for 5313 /// array sections) LHS op = RHS. 5314 /// \param Type Type of array. 5315 /// \param LHSVar Variable on the left side of the reduction operation 5316 /// (references element of array in original variable). 5317 /// \param RHSVar Variable on the right side of the reduction operation 5318 /// (references element of array in original variable). 5319 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5320 /// RHSVar. 5321 static void EmitOMPAggregateReduction( 5322 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5323 const VarDecl *RHSVar, 5324 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5325 const Expr *, const Expr *)> &RedOpGen, 5326 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5327 const Expr *UpExpr = nullptr) { 5328 // Perform element-by-element initialization. 5329 QualType ElementTy; 5330 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5331 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5332 5333 // Drill down to the base element type on both arrays. 5334 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5335 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5336 5337 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5338 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5339 // Cast from pointer to array type to pointer to single element. 5340 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5341 // The basic structure here is a while-do loop. 5342 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5343 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5344 llvm::Value *IsEmpty = 5345 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5346 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5347 5348 // Enter the loop body, making that address the current address. 5349 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5350 CGF.EmitBlock(BodyBB); 5351 5352 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5353 5354 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5355 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5356 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5357 Address RHSElementCurrent = 5358 Address(RHSElementPHI, 5359 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5360 5361 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5362 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5363 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5364 Address LHSElementCurrent = 5365 Address(LHSElementPHI, 5366 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5367 5368 // Emit copy. 5369 CodeGenFunction::OMPPrivateScope Scope(CGF); 5370 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5371 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5372 Scope.Privatize(); 5373 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5374 Scope.ForceCleanup(); 5375 5376 // Shift the address forward by one element. 5377 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5378 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5379 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5380 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5381 // Check whether we've reached the end. 5382 llvm::Value *Done = 5383 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5384 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5385 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5386 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5387 5388 // Done. 5389 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5390 } 5391 5392 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5393 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5394 /// UDR combiner function. 5395 static void emitReductionCombiner(CodeGenFunction &CGF, 5396 const Expr *ReductionOp) { 5397 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5398 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5399 if (const auto *DRE = 5400 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5401 if (const auto *DRD = 5402 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5403 std::pair<llvm::Function *, llvm::Function *> Reduction = 5404 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5405 RValue Func = RValue::get(Reduction.first); 5406 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5407 CGF.EmitIgnoredExpr(ReductionOp); 5408 return; 5409 } 5410 CGF.EmitIgnoredExpr(ReductionOp); 5411 } 5412 5413 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5414 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5415 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5416 ArrayRef<const Expr *> ReductionOps) { 5417 ASTContext &C = CGM.getContext(); 5418 5419 // void reduction_func(void *LHSArg, void *RHSArg); 5420 FunctionArgList Args; 5421 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5422 ImplicitParamDecl::Other); 5423 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5424 ImplicitParamDecl::Other); 5425 Args.push_back(&LHSArg); 5426 Args.push_back(&RHSArg); 5427 const auto &CGFI = 5428 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5429 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5430 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5431 llvm::GlobalValue::InternalLinkage, Name, 5432 &CGM.getModule()); 5433 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5434 Fn->setDoesNotRecurse(); 5435 CodeGenFunction CGF(CGM); 5436 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5437 5438 // Dst = (void*[n])(LHSArg); 5439 // Src = (void*[n])(RHSArg); 5440 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5441 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5442 ArgsType), CGF.getPointerAlign()); 5443 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5444 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5445 ArgsType), CGF.getPointerAlign()); 5446 5447 // ... 5448 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5449 // ... 5450 CodeGenFunction::OMPPrivateScope Scope(CGF); 5451 auto IPriv = Privates.begin(); 5452 unsigned Idx = 0; 5453 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5454 const auto *RHSVar = 5455 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5456 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5457 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5458 }); 5459 const auto *LHSVar = 5460 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5461 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5462 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5463 }); 5464 QualType PrivTy = (*IPriv)->getType(); 5465 if (PrivTy->isVariablyModifiedType()) { 5466 // Get array size and emit VLA type. 5467 ++Idx; 5468 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5469 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5470 const VariableArrayType *VLA = 5471 CGF.getContext().getAsVariableArrayType(PrivTy); 5472 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5473 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5474 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5475 CGF.EmitVariablyModifiedType(PrivTy); 5476 } 5477 } 5478 Scope.Privatize(); 5479 IPriv = Privates.begin(); 5480 auto ILHS = LHSExprs.begin(); 5481 auto IRHS = RHSExprs.begin(); 5482 for (const Expr *E : ReductionOps) { 5483 if ((*IPriv)->getType()->isArrayType()) { 5484 // Emit reduction for array section. 5485 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5486 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5487 EmitOMPAggregateReduction( 5488 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5489 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5490 emitReductionCombiner(CGF, E); 5491 }); 5492 } else { 5493 // Emit reduction for array subscript or single variable. 5494 emitReductionCombiner(CGF, E); 5495 } 5496 ++IPriv; 5497 ++ILHS; 5498 ++IRHS; 5499 } 5500 Scope.ForceCleanup(); 5501 CGF.FinishFunction(); 5502 return Fn; 5503 } 5504 5505 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5506 const Expr *ReductionOp, 5507 const Expr *PrivateRef, 5508 const DeclRefExpr *LHS, 5509 const DeclRefExpr *RHS) { 5510 if (PrivateRef->getType()->isArrayType()) { 5511 // Emit reduction for array section. 5512 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5513 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5514 EmitOMPAggregateReduction( 5515 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5516 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5517 emitReductionCombiner(CGF, ReductionOp); 5518 }); 5519 } else { 5520 // Emit reduction for array subscript or single variable. 5521 emitReductionCombiner(CGF, ReductionOp); 5522 } 5523 } 5524 5525 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5526 ArrayRef<const Expr *> Privates, 5527 ArrayRef<const Expr *> LHSExprs, 5528 ArrayRef<const Expr *> RHSExprs, 5529 ArrayRef<const Expr *> ReductionOps, 5530 ReductionOptionsTy Options) { 5531 if (!CGF.HaveInsertPoint()) 5532 return; 5533 5534 bool WithNowait = Options.WithNowait; 5535 bool SimpleReduction = Options.SimpleReduction; 5536 5537 // Next code should be emitted for reduction: 5538 // 5539 // static kmp_critical_name lock = { 0 }; 5540 // 5541 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5542 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5543 // ... 5544 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5545 // *(Type<n>-1*)rhs[<n>-1]); 5546 // } 5547 // 5548 // ... 5549 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5550 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5551 // RedList, reduce_func, &<lock>)) { 5552 // case 1: 5553 // ... 5554 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5555 // ... 5556 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5557 // break; 5558 // case 2: 5559 // ... 5560 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5561 // ... 5562 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5563 // break; 5564 // default:; 5565 // } 5566 // 5567 // if SimpleReduction is true, only the next code is generated: 5568 // ... 5569 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5570 // ... 5571 5572 ASTContext &C = CGM.getContext(); 5573 5574 if (SimpleReduction) { 5575 CodeGenFunction::RunCleanupsScope Scope(CGF); 5576 auto IPriv = Privates.begin(); 5577 auto ILHS = LHSExprs.begin(); 5578 auto IRHS = RHSExprs.begin(); 5579 for (const Expr *E : ReductionOps) { 5580 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5581 cast<DeclRefExpr>(*IRHS)); 5582 ++IPriv; 5583 ++ILHS; 5584 ++IRHS; 5585 } 5586 return; 5587 } 5588 5589 // 1. Build a list of reduction variables. 5590 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5591 auto Size = RHSExprs.size(); 5592 for (const Expr *E : Privates) { 5593 if (E->getType()->isVariablyModifiedType()) 5594 // Reserve place for array size. 5595 ++Size; 5596 } 5597 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5598 QualType ReductionArrayTy = 5599 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5600 /*IndexTypeQuals=*/0); 5601 Address ReductionList = 5602 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5603 auto IPriv = Privates.begin(); 5604 unsigned Idx = 0; 5605 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5606 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5607 CGF.Builder.CreateStore( 5608 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5609 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5610 Elem); 5611 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5612 // Store array size. 5613 ++Idx; 5614 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5615 llvm::Value *Size = CGF.Builder.CreateIntCast( 5616 CGF.getVLASize( 5617 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5618 .NumElts, 5619 CGF.SizeTy, /*isSigned=*/false); 5620 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5621 Elem); 5622 } 5623 } 5624 5625 // 2. Emit reduce_func(). 5626 llvm::Function *ReductionFn = emitReductionFunction( 5627 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5628 LHSExprs, RHSExprs, ReductionOps); 5629 5630 // 3. Create static kmp_critical_name lock = { 0 }; 5631 std::string Name = getName({"reduction"}); 5632 llvm::Value *Lock = getCriticalRegionLock(Name); 5633 5634 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5635 // RedList, reduce_func, &<lock>); 5636 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5637 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5638 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5639 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5640 ReductionList.getPointer(), CGF.VoidPtrTy); 5641 llvm::Value *Args[] = { 5642 IdentTLoc, // ident_t *<loc> 5643 ThreadId, // i32 <gtid> 5644 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5645 ReductionArrayTySize, // size_type sizeof(RedList) 5646 RL, // void *RedList 5647 ReductionFn, // void (*) (void *, void *) <reduce_func> 5648 Lock // kmp_critical_name *&<lock> 5649 }; 5650 llvm::Value *Res = CGF.EmitRuntimeCall( 5651 OMPBuilder.getOrCreateRuntimeFunction( 5652 CGM.getModule(), 5653 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5654 Args); 5655 5656 // 5. Build switch(res) 5657 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5658 llvm::SwitchInst *SwInst = 5659 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5660 5661 // 6. Build case 1: 5662 // ... 5663 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5664 // ... 5665 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5666 // break; 5667 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5668 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5669 CGF.EmitBlock(Case1BB); 5670 5671 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5672 llvm::Value *EndArgs[] = { 5673 IdentTLoc, // ident_t *<loc> 5674 ThreadId, // i32 <gtid> 5675 Lock // kmp_critical_name *&<lock> 5676 }; 5677 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5678 CodeGenFunction &CGF, PrePostActionTy &Action) { 5679 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5680 auto IPriv = Privates.begin(); 5681 auto ILHS = LHSExprs.begin(); 5682 auto IRHS = RHSExprs.begin(); 5683 for (const Expr *E : ReductionOps) { 5684 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5685 cast<DeclRefExpr>(*IRHS)); 5686 ++IPriv; 5687 ++ILHS; 5688 ++IRHS; 5689 } 5690 }; 5691 RegionCodeGenTy RCG(CodeGen); 5692 CommonActionTy Action( 5693 nullptr, llvm::None, 5694 OMPBuilder.getOrCreateRuntimeFunction( 5695 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5696 : OMPRTL___kmpc_end_reduce), 5697 EndArgs); 5698 RCG.setAction(Action); 5699 RCG(CGF); 5700 5701 CGF.EmitBranch(DefaultBB); 5702 5703 // 7. Build case 2: 5704 // ... 5705 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5706 // ... 5707 // break; 5708 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5709 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5710 CGF.EmitBlock(Case2BB); 5711 5712 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5713 CodeGenFunction &CGF, PrePostActionTy &Action) { 5714 auto ILHS = LHSExprs.begin(); 5715 auto IRHS = RHSExprs.begin(); 5716 auto IPriv = Privates.begin(); 5717 for (const Expr *E : ReductionOps) { 5718 const Expr *XExpr = nullptr; 5719 const Expr *EExpr = nullptr; 5720 const Expr *UpExpr = nullptr; 5721 BinaryOperatorKind BO = BO_Comma; 5722 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5723 if (BO->getOpcode() == BO_Assign) { 5724 XExpr = BO->getLHS(); 5725 UpExpr = BO->getRHS(); 5726 } 5727 } 5728 // Try to emit update expression as a simple atomic. 5729 const Expr *RHSExpr = UpExpr; 5730 if (RHSExpr) { 5731 // Analyze RHS part of the whole expression. 5732 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5733 RHSExpr->IgnoreParenImpCasts())) { 5734 // If this is a conditional operator, analyze its condition for 5735 // min/max reduction operator. 5736 RHSExpr = ACO->getCond(); 5737 } 5738 if (const auto *BORHS = 5739 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5740 EExpr = BORHS->getRHS(); 5741 BO = BORHS->getOpcode(); 5742 } 5743 } 5744 if (XExpr) { 5745 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5746 auto &&AtomicRedGen = [BO, VD, 5747 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5748 const Expr *EExpr, const Expr *UpExpr) { 5749 LValue X = CGF.EmitLValue(XExpr); 5750 RValue E; 5751 if (EExpr) 5752 E = CGF.EmitAnyExpr(EExpr); 5753 CGF.EmitOMPAtomicSimpleUpdateExpr( 5754 X, E, BO, /*IsXLHSInRHSPart=*/true, 5755 llvm::AtomicOrdering::Monotonic, Loc, 5756 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5757 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5758 PrivateScope.addPrivate( 5759 VD, [&CGF, VD, XRValue, Loc]() { 5760 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5761 CGF.emitOMPSimpleStore( 5762 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5763 VD->getType().getNonReferenceType(), Loc); 5764 return LHSTemp; 5765 }); 5766 (void)PrivateScope.Privatize(); 5767 return CGF.EmitAnyExpr(UpExpr); 5768 }); 5769 }; 5770 if ((*IPriv)->getType()->isArrayType()) { 5771 // Emit atomic reduction for array section. 5772 const auto *RHSVar = 5773 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5774 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5775 AtomicRedGen, XExpr, EExpr, UpExpr); 5776 } else { 5777 // Emit atomic reduction for array subscript or single variable. 5778 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5779 } 5780 } else { 5781 // Emit as a critical region. 5782 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5783 const Expr *, const Expr *) { 5784 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5785 std::string Name = RT.getName({"atomic_reduction"}); 5786 RT.emitCriticalRegion( 5787 CGF, Name, 5788 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5789 Action.Enter(CGF); 5790 emitReductionCombiner(CGF, E); 5791 }, 5792 Loc); 5793 }; 5794 if ((*IPriv)->getType()->isArrayType()) { 5795 const auto *LHSVar = 5796 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5797 const auto *RHSVar = 5798 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5799 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5800 CritRedGen); 5801 } else { 5802 CritRedGen(CGF, nullptr, nullptr, nullptr); 5803 } 5804 } 5805 ++ILHS; 5806 ++IRHS; 5807 ++IPriv; 5808 } 5809 }; 5810 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5811 if (!WithNowait) { 5812 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5813 llvm::Value *EndArgs[] = { 5814 IdentTLoc, // ident_t *<loc> 5815 ThreadId, // i32 <gtid> 5816 Lock // kmp_critical_name *&<lock> 5817 }; 5818 CommonActionTy Action(nullptr, llvm::None, 5819 OMPBuilder.getOrCreateRuntimeFunction( 5820 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5821 EndArgs); 5822 AtomicRCG.setAction(Action); 5823 AtomicRCG(CGF); 5824 } else { 5825 AtomicRCG(CGF); 5826 } 5827 5828 CGF.EmitBranch(DefaultBB); 5829 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5830 } 5831 5832 /// Generates unique name for artificial threadprivate variables. 5833 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5834 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5835 const Expr *Ref) { 5836 SmallString<256> Buffer; 5837 llvm::raw_svector_ostream Out(Buffer); 5838 const clang::DeclRefExpr *DE; 5839 const VarDecl *D = ::getBaseDecl(Ref, DE); 5840 if (!D) 5841 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5842 D = D->getCanonicalDecl(); 5843 std::string Name = CGM.getOpenMPRuntime().getName( 5844 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5845 Out << Prefix << Name << "_" 5846 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5847 return std::string(Out.str()); 5848 } 5849 5850 /// Emits reduction initializer function: 5851 /// \code 5852 /// void @.red_init(void* %arg, void* %orig) { 5853 /// %0 = bitcast void* %arg to <type>* 5854 /// store <type> <init>, <type>* %0 5855 /// ret void 5856 /// } 5857 /// \endcode 5858 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5859 SourceLocation Loc, 5860 ReductionCodeGen &RCG, unsigned N) { 5861 ASTContext &C = CGM.getContext(); 5862 QualType VoidPtrTy = C.VoidPtrTy; 5863 VoidPtrTy.addRestrict(); 5864 FunctionArgList Args; 5865 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5866 ImplicitParamDecl::Other); 5867 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5868 ImplicitParamDecl::Other); 5869 Args.emplace_back(&Param); 5870 Args.emplace_back(&ParamOrig); 5871 const auto &FnInfo = 5872 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5873 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5874 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5875 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5876 Name, &CGM.getModule()); 5877 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5878 Fn->setDoesNotRecurse(); 5879 CodeGenFunction CGF(CGM); 5880 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5881 Address PrivateAddr = CGF.EmitLoadOfPointer( 5882 CGF.GetAddrOfLocalVar(&Param), 5883 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5884 llvm::Value *Size = nullptr; 5885 // If the size of the reduction item is non-constant, load it from global 5886 // threadprivate variable. 5887 if (RCG.getSizes(N).second) { 5888 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5889 CGF, CGM.getContext().getSizeType(), 5890 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5891 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5892 CGM.getContext().getSizeType(), Loc); 5893 } 5894 RCG.emitAggregateType(CGF, N, Size); 5895 LValue OrigLVal; 5896 // If initializer uses initializer from declare reduction construct, emit a 5897 // pointer to the address of the original reduction item (reuired by reduction 5898 // initializer) 5899 if (RCG.usesReductionInitializer(N)) { 5900 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5901 SharedAddr = CGF.EmitLoadOfPointer( 5902 SharedAddr, 5903 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5904 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5905 } else { 5906 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5907 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5908 CGM.getContext().VoidPtrTy); 5909 } 5910 // Emit the initializer: 5911 // %0 = bitcast void* %arg to <type>* 5912 // store <type> <init>, <type>* %0 5913 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5914 [](CodeGenFunction &) { return false; }); 5915 CGF.FinishFunction(); 5916 return Fn; 5917 } 5918 5919 /// Emits reduction combiner function: 5920 /// \code 5921 /// void @.red_comb(void* %arg0, void* %arg1) { 5922 /// %lhs = bitcast void* %arg0 to <type>* 5923 /// %rhs = bitcast void* %arg1 to <type>* 5924 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5925 /// store <type> %2, <type>* %lhs 5926 /// ret void 5927 /// } 5928 /// \endcode 5929 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5930 SourceLocation Loc, 5931 ReductionCodeGen &RCG, unsigned N, 5932 const Expr *ReductionOp, 5933 const Expr *LHS, const Expr *RHS, 5934 const Expr *PrivateRef) { 5935 ASTContext &C = CGM.getContext(); 5936 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5937 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5938 FunctionArgList Args; 5939 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5940 C.VoidPtrTy, ImplicitParamDecl::Other); 5941 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5942 ImplicitParamDecl::Other); 5943 Args.emplace_back(&ParamInOut); 5944 Args.emplace_back(&ParamIn); 5945 const auto &FnInfo = 5946 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5947 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5948 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5949 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5950 Name, &CGM.getModule()); 5951 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5952 Fn->setDoesNotRecurse(); 5953 CodeGenFunction CGF(CGM); 5954 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5955 llvm::Value *Size = nullptr; 5956 // If the size of the reduction item is non-constant, load it from global 5957 // threadprivate variable. 5958 if (RCG.getSizes(N).second) { 5959 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5960 CGF, CGM.getContext().getSizeType(), 5961 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5962 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5963 CGM.getContext().getSizeType(), Loc); 5964 } 5965 RCG.emitAggregateType(CGF, N, Size); 5966 // Remap lhs and rhs variables to the addresses of the function arguments. 5967 // %lhs = bitcast void* %arg0 to <type>* 5968 // %rhs = bitcast void* %arg1 to <type>* 5969 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5970 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5971 // Pull out the pointer to the variable. 5972 Address PtrAddr = CGF.EmitLoadOfPointer( 5973 CGF.GetAddrOfLocalVar(&ParamInOut), 5974 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5975 return CGF.Builder.CreateElementBitCast( 5976 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5977 }); 5978 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5979 // Pull out the pointer to the variable. 5980 Address PtrAddr = CGF.EmitLoadOfPointer( 5981 CGF.GetAddrOfLocalVar(&ParamIn), 5982 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5983 return CGF.Builder.CreateElementBitCast( 5984 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5985 }); 5986 PrivateScope.Privatize(); 5987 // Emit the combiner body: 5988 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5989 // store <type> %2, <type>* %lhs 5990 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5991 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5992 cast<DeclRefExpr>(RHS)); 5993 CGF.FinishFunction(); 5994 return Fn; 5995 } 5996 5997 /// Emits reduction finalizer function: 5998 /// \code 5999 /// void @.red_fini(void* %arg) { 6000 /// %0 = bitcast void* %arg to <type>* 6001 /// <destroy>(<type>* %0) 6002 /// ret void 6003 /// } 6004 /// \endcode 6005 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6006 SourceLocation Loc, 6007 ReductionCodeGen &RCG, unsigned N) { 6008 if (!RCG.needCleanups(N)) 6009 return nullptr; 6010 ASTContext &C = CGM.getContext(); 6011 FunctionArgList Args; 6012 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6013 ImplicitParamDecl::Other); 6014 Args.emplace_back(&Param); 6015 const auto &FnInfo = 6016 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6017 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6018 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6019 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6020 Name, &CGM.getModule()); 6021 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6022 Fn->setDoesNotRecurse(); 6023 CodeGenFunction CGF(CGM); 6024 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6025 Address PrivateAddr = CGF.EmitLoadOfPointer( 6026 CGF.GetAddrOfLocalVar(&Param), 6027 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6028 llvm::Value *Size = nullptr; 6029 // If the size of the reduction item is non-constant, load it from global 6030 // threadprivate variable. 6031 if (RCG.getSizes(N).second) { 6032 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6033 CGF, CGM.getContext().getSizeType(), 6034 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6035 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6036 CGM.getContext().getSizeType(), Loc); 6037 } 6038 RCG.emitAggregateType(CGF, N, Size); 6039 // Emit the finalizer body: 6040 // <destroy>(<type>* %0) 6041 RCG.emitCleanups(CGF, N, PrivateAddr); 6042 CGF.FinishFunction(Loc); 6043 return Fn; 6044 } 6045 6046 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6047 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6048 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6049 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6050 return nullptr; 6051 6052 // Build typedef struct: 6053 // kmp_taskred_input { 6054 // void *reduce_shar; // shared reduction item 6055 // void *reduce_orig; // original reduction item used for initialization 6056 // size_t reduce_size; // size of data item 6057 // void *reduce_init; // data initialization routine 6058 // void *reduce_fini; // data finalization routine 6059 // void *reduce_comb; // data combiner routine 6060 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6061 // } kmp_taskred_input_t; 6062 ASTContext &C = CGM.getContext(); 6063 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6064 RD->startDefinition(); 6065 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6066 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6067 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6068 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6069 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6070 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6071 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6072 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6073 RD->completeDefinition(); 6074 QualType RDType = C.getRecordType(RD); 6075 unsigned Size = Data.ReductionVars.size(); 6076 llvm::APInt ArraySize(/*numBits=*/64, Size); 6077 QualType ArrayRDType = C.getConstantArrayType( 6078 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6079 // kmp_task_red_input_t .rd_input.[Size]; 6080 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6081 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6082 Data.ReductionCopies, Data.ReductionOps); 6083 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6084 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6085 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6086 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6087 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6088 TaskRedInput.getPointer(), Idxs, 6089 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6090 ".rd_input.gep."); 6091 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6092 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6093 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6094 RCG.emitSharedOrigLValue(CGF, Cnt); 6095 llvm::Value *CastedShared = 6096 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6097 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6098 // ElemLVal.reduce_orig = &Origs[Cnt]; 6099 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6100 llvm::Value *CastedOrig = 6101 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6102 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6103 RCG.emitAggregateType(CGF, Cnt); 6104 llvm::Value *SizeValInChars; 6105 llvm::Value *SizeVal; 6106 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6107 // We use delayed creation/initialization for VLAs and array sections. It is 6108 // required because runtime does not provide the way to pass the sizes of 6109 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6110 // threadprivate global variables are used to store these values and use 6111 // them in the functions. 6112 bool DelayedCreation = !!SizeVal; 6113 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6114 /*isSigned=*/false); 6115 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6116 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6117 // ElemLVal.reduce_init = init; 6118 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6119 llvm::Value *InitAddr = 6120 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6121 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6122 // ElemLVal.reduce_fini = fini; 6123 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6124 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6125 llvm::Value *FiniAddr = Fini 6126 ? CGF.EmitCastToVoidPtr(Fini) 6127 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6128 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6129 // ElemLVal.reduce_comb = comb; 6130 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6131 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6132 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6133 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6134 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6135 // ElemLVal.flags = 0; 6136 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6137 if (DelayedCreation) { 6138 CGF.EmitStoreOfScalar( 6139 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6140 FlagsLVal); 6141 } else 6142 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6143 FlagsLVal.getType()); 6144 } 6145 if (Data.IsReductionWithTaskMod) { 6146 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6147 // is_ws, int num, void *data); 6148 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6149 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6150 CGM.IntTy, /*isSigned=*/true); 6151 llvm::Value *Args[] = { 6152 IdentTLoc, GTid, 6153 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6154 /*isSigned=*/true), 6155 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6156 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6157 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6158 return CGF.EmitRuntimeCall( 6159 OMPBuilder.getOrCreateRuntimeFunction( 6160 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6161 Args); 6162 } 6163 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6164 llvm::Value *Args[] = { 6165 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6166 /*isSigned=*/true), 6167 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6169 CGM.VoidPtrTy)}; 6170 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6171 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6172 Args); 6173 } 6174 6175 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6176 SourceLocation Loc, 6177 bool IsWorksharingReduction) { 6178 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6179 // is_ws, int num, void *data); 6180 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6181 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6182 CGM.IntTy, /*isSigned=*/true); 6183 llvm::Value *Args[] = {IdentTLoc, GTid, 6184 llvm::ConstantInt::get(CGM.IntTy, 6185 IsWorksharingReduction ? 1 : 0, 6186 /*isSigned=*/true)}; 6187 (void)CGF.EmitRuntimeCall( 6188 OMPBuilder.getOrCreateRuntimeFunction( 6189 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6190 Args); 6191 } 6192 6193 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6194 SourceLocation Loc, 6195 ReductionCodeGen &RCG, 6196 unsigned N) { 6197 auto Sizes = RCG.getSizes(N); 6198 // Emit threadprivate global variable if the type is non-constant 6199 // (Sizes.second = nullptr). 6200 if (Sizes.second) { 6201 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6202 /*isSigned=*/false); 6203 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6204 CGF, CGM.getContext().getSizeType(), 6205 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6206 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6207 } 6208 } 6209 6210 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6211 SourceLocation Loc, 6212 llvm::Value *ReductionsPtr, 6213 LValue SharedLVal) { 6214 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6215 // *d); 6216 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6217 CGM.IntTy, 6218 /*isSigned=*/true), 6219 ReductionsPtr, 6220 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6221 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6222 return Address( 6223 CGF.EmitRuntimeCall( 6224 OMPBuilder.getOrCreateRuntimeFunction( 6225 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6226 Args), 6227 SharedLVal.getAlignment()); 6228 } 6229 6230 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6231 SourceLocation Loc) { 6232 if (!CGF.HaveInsertPoint()) 6233 return; 6234 6235 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6236 OMPBuilder.createTaskwait(CGF.Builder); 6237 } else { 6238 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6239 // global_tid); 6240 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6241 // Ignore return result until untied tasks are supported. 6242 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6243 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6244 Args); 6245 } 6246 6247 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6248 Region->emitUntiedSwitch(CGF); 6249 } 6250 6251 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6252 OpenMPDirectiveKind InnerKind, 6253 const RegionCodeGenTy &CodeGen, 6254 bool HasCancel) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6258 InnerKind != OMPD_critical && 6259 InnerKind != OMPD_master && 6260 InnerKind != OMPD_masked); 6261 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6262 } 6263 6264 namespace { 6265 enum RTCancelKind { 6266 CancelNoreq = 0, 6267 CancelParallel = 1, 6268 CancelLoop = 2, 6269 CancelSections = 3, 6270 CancelTaskgroup = 4 6271 }; 6272 } // anonymous namespace 6273 6274 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6275 RTCancelKind CancelKind = CancelNoreq; 6276 if (CancelRegion == OMPD_parallel) 6277 CancelKind = CancelParallel; 6278 else if (CancelRegion == OMPD_for) 6279 CancelKind = CancelLoop; 6280 else if (CancelRegion == OMPD_sections) 6281 CancelKind = CancelSections; 6282 else { 6283 assert(CancelRegion == OMPD_taskgroup); 6284 CancelKind = CancelTaskgroup; 6285 } 6286 return CancelKind; 6287 } 6288 6289 void CGOpenMPRuntime::emitCancellationPointCall( 6290 CodeGenFunction &CGF, SourceLocation Loc, 6291 OpenMPDirectiveKind CancelRegion) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6295 // global_tid, kmp_int32 cncl_kind); 6296 if (auto *OMPRegionInfo = 6297 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6298 // For 'cancellation point taskgroup', the task region info may not have a 6299 // cancel. This may instead happen in another adjacent task. 6300 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6301 llvm::Value *Args[] = { 6302 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6303 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6304 // Ignore return result until untied tasks are supported. 6305 llvm::Value *Result = CGF.EmitRuntimeCall( 6306 OMPBuilder.getOrCreateRuntimeFunction( 6307 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6308 Args); 6309 // if (__kmpc_cancellationpoint()) { 6310 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6311 // exit from construct; 6312 // } 6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6317 CGF.EmitBlock(ExitBB); 6318 if (CancelRegion == OMPD_parallel) 6319 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6320 // exit from construct; 6321 CodeGenFunction::JumpDest CancelDest = 6322 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6323 CGF.EmitBranchThroughCleanup(CancelDest); 6324 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6325 } 6326 } 6327 } 6328 6329 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6330 const Expr *IfCond, 6331 OpenMPDirectiveKind CancelRegion) { 6332 if (!CGF.HaveInsertPoint()) 6333 return; 6334 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6335 // kmp_int32 cncl_kind); 6336 auto &M = CGM.getModule(); 6337 if (auto *OMPRegionInfo = 6338 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6339 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6340 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6341 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6342 llvm::Value *Args[] = { 6343 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6344 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6345 // Ignore return result until untied tasks are supported. 6346 llvm::Value *Result = CGF.EmitRuntimeCall( 6347 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6348 // if (__kmpc_cancel()) { 6349 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6350 // exit from construct; 6351 // } 6352 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6353 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6354 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6355 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6356 CGF.EmitBlock(ExitBB); 6357 if (CancelRegion == OMPD_parallel) 6358 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6359 // exit from construct; 6360 CodeGenFunction::JumpDest CancelDest = 6361 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6362 CGF.EmitBranchThroughCleanup(CancelDest); 6363 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6364 }; 6365 if (IfCond) { 6366 emitIfClause(CGF, IfCond, ThenGen, 6367 [](CodeGenFunction &, PrePostActionTy &) {}); 6368 } else { 6369 RegionCodeGenTy ThenRCG(ThenGen); 6370 ThenRCG(CGF); 6371 } 6372 } 6373 } 6374 6375 namespace { 6376 /// Cleanup action for uses_allocators support. 6377 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6378 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6379 6380 public: 6381 OMPUsesAllocatorsActionTy( 6382 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6383 : Allocators(Allocators) {} 6384 void Enter(CodeGenFunction &CGF) override { 6385 if (!CGF.HaveInsertPoint()) 6386 return; 6387 for (const auto &AllocatorData : Allocators) { 6388 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6389 CGF, AllocatorData.first, AllocatorData.second); 6390 } 6391 } 6392 void Exit(CodeGenFunction &CGF) override { 6393 if (!CGF.HaveInsertPoint()) 6394 return; 6395 for (const auto &AllocatorData : Allocators) { 6396 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6397 AllocatorData.first); 6398 } 6399 } 6400 }; 6401 } // namespace 6402 6403 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6404 const OMPExecutableDirective &D, StringRef ParentName, 6405 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6406 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6407 assert(!ParentName.empty() && "Invalid target region parent name!"); 6408 HasEmittedTargetRegion = true; 6409 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6410 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6411 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6412 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6413 if (!D.AllocatorTraits) 6414 continue; 6415 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6416 } 6417 } 6418 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6419 CodeGen.setAction(UsesAllocatorAction); 6420 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6421 IsOffloadEntry, CodeGen); 6422 } 6423 6424 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6425 const Expr *Allocator, 6426 const Expr *AllocatorTraits) { 6427 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6428 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6429 // Use default memspace handle. 6430 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6431 llvm::Value *NumTraits = llvm::ConstantInt::get( 6432 CGF.IntTy, cast<ConstantArrayType>( 6433 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6434 ->getSize() 6435 .getLimitedValue()); 6436 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6437 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6438 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6439 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6440 AllocatorTraitsLVal.getBaseInfo(), 6441 AllocatorTraitsLVal.getTBAAInfo()); 6442 llvm::Value *Traits = 6443 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6444 6445 llvm::Value *AllocatorVal = 6446 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6447 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6448 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6449 // Store to allocator. 6450 CGF.EmitVarDecl(*cast<VarDecl>( 6451 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6452 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6453 AllocatorVal = 6454 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6455 Allocator->getType(), Allocator->getExprLoc()); 6456 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6457 } 6458 6459 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6460 const Expr *Allocator) { 6461 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6462 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6463 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6464 llvm::Value *AllocatorVal = 6465 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6466 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6467 CGF.getContext().VoidPtrTy, 6468 Allocator->getExprLoc()); 6469 (void)CGF.EmitRuntimeCall( 6470 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6471 OMPRTL___kmpc_destroy_allocator), 6472 {ThreadId, AllocatorVal}); 6473 } 6474 6475 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6476 const OMPExecutableDirective &D, StringRef ParentName, 6477 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6478 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6479 // Create a unique name for the entry function using the source location 6480 // information of the current target region. The name will be something like: 6481 // 6482 // __omp_offloading_DD_FFFF_PP_lBB 6483 // 6484 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6485 // mangled name of the function that encloses the target region and BB is the 6486 // line number of the target region. 6487 6488 unsigned DeviceID; 6489 unsigned FileID; 6490 unsigned Line; 6491 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6492 Line); 6493 SmallString<64> EntryFnName; 6494 { 6495 llvm::raw_svector_ostream OS(EntryFnName); 6496 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6497 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6498 } 6499 6500 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6501 6502 CodeGenFunction CGF(CGM, true); 6503 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6505 6506 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6507 6508 // If this target outline function is not an offload entry, we don't need to 6509 // register it. 6510 if (!IsOffloadEntry) 6511 return; 6512 6513 // The target region ID is used by the runtime library to identify the current 6514 // target region, so it only has to be unique and not necessarily point to 6515 // anything. It could be the pointer to the outlined function that implements 6516 // the target region, but we aren't using that so that the compiler doesn't 6517 // need to keep that, and could therefore inline the host function if proven 6518 // worthwhile during optimization. In the other hand, if emitting code for the 6519 // device, the ID has to be the function address so that it can retrieved from 6520 // the offloading entry and launched by the runtime library. We also mark the 6521 // outlined function to have external linkage in case we are emitting code for 6522 // the device, because these functions will be entry points to the device. 6523 6524 if (CGM.getLangOpts().OpenMPIsDevice) { 6525 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6526 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6527 OutlinedFn->setDSOLocal(false); 6528 if (CGM.getTriple().isAMDGCN()) 6529 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6530 } else { 6531 std::string Name = getName({EntryFnName, "region_id"}); 6532 OutlinedFnID = new llvm::GlobalVariable( 6533 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6534 llvm::GlobalValue::WeakAnyLinkage, 6535 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6536 } 6537 6538 // Register the information for the entry associated with this target region. 6539 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6540 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6541 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6542 } 6543 6544 /// Checks if the expression is constant or does not have non-trivial function 6545 /// calls. 6546 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6547 // We can skip constant expressions. 6548 // We can skip expressions with trivial calls or simple expressions. 6549 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6550 !E->hasNonTrivialCall(Ctx)) && 6551 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6552 } 6553 6554 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6555 const Stmt *Body) { 6556 const Stmt *Child = Body->IgnoreContainers(); 6557 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6558 Child = nullptr; 6559 for (const Stmt *S : C->body()) { 6560 if (const auto *E = dyn_cast<Expr>(S)) { 6561 if (isTrivial(Ctx, E)) 6562 continue; 6563 } 6564 // Some of the statements can be ignored. 6565 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6566 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6567 continue; 6568 // Analyze declarations. 6569 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6570 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6571 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6572 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6573 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6574 isa<UsingDirectiveDecl>(D) || 6575 isa<OMPDeclareReductionDecl>(D) || 6576 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6577 return true; 6578 const auto *VD = dyn_cast<VarDecl>(D); 6579 if (!VD) 6580 return false; 6581 return VD->hasGlobalStorage() || !VD->isUsed(); 6582 })) 6583 continue; 6584 } 6585 // Found multiple children - cannot get the one child only. 6586 if (Child) 6587 return nullptr; 6588 Child = S; 6589 } 6590 if (Child) 6591 Child = Child->IgnoreContainers(); 6592 } 6593 return Child; 6594 } 6595 6596 /// Emit the number of teams for a target directive. Inspect the num_teams 6597 /// clause associated with a teams construct combined or closely nested 6598 /// with the target directive. 6599 /// 6600 /// Emit a team of size one for directives such as 'target parallel' that 6601 /// have no associated teams construct. 6602 /// 6603 /// Otherwise, return nullptr. 6604 static llvm::Value * 6605 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6606 const OMPExecutableDirective &D) { 6607 assert(!CGF.getLangOpts().OpenMPIsDevice && 6608 "Clauses associated with the teams directive expected to be emitted " 6609 "only for the host!"); 6610 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6611 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6612 "Expected target-based executable directive."); 6613 CGBuilderTy &Bld = CGF.Builder; 6614 switch (DirectiveKind) { 6615 case OMPD_target: { 6616 const auto *CS = D.getInnermostCapturedStmt(); 6617 const auto *Body = 6618 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6619 const Stmt *ChildStmt = 6620 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6621 if (const auto *NestedDir = 6622 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6623 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6624 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6625 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6626 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6627 const Expr *NumTeams = 6628 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6629 llvm::Value *NumTeamsVal = 6630 CGF.EmitScalarExpr(NumTeams, 6631 /*IgnoreResultAssign*/ true); 6632 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6633 /*isSigned=*/true); 6634 } 6635 return Bld.getInt32(0); 6636 } 6637 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6638 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6639 return Bld.getInt32(1); 6640 return Bld.getInt32(0); 6641 } 6642 return nullptr; 6643 } 6644 case OMPD_target_teams: 6645 case OMPD_target_teams_distribute: 6646 case OMPD_target_teams_distribute_simd: 6647 case OMPD_target_teams_distribute_parallel_for: 6648 case OMPD_target_teams_distribute_parallel_for_simd: { 6649 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6650 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6651 const Expr *NumTeams = 6652 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6653 llvm::Value *NumTeamsVal = 6654 CGF.EmitScalarExpr(NumTeams, 6655 /*IgnoreResultAssign*/ true); 6656 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6657 /*isSigned=*/true); 6658 } 6659 return Bld.getInt32(0); 6660 } 6661 case OMPD_target_parallel: 6662 case OMPD_target_parallel_for: 6663 case OMPD_target_parallel_for_simd: 6664 case OMPD_target_simd: 6665 return Bld.getInt32(1); 6666 case OMPD_parallel: 6667 case OMPD_for: 6668 case OMPD_parallel_for: 6669 case OMPD_parallel_master: 6670 case OMPD_parallel_sections: 6671 case OMPD_for_simd: 6672 case OMPD_parallel_for_simd: 6673 case OMPD_cancel: 6674 case OMPD_cancellation_point: 6675 case OMPD_ordered: 6676 case OMPD_threadprivate: 6677 case OMPD_allocate: 6678 case OMPD_task: 6679 case OMPD_simd: 6680 case OMPD_tile: 6681 case OMPD_unroll: 6682 case OMPD_sections: 6683 case OMPD_section: 6684 case OMPD_single: 6685 case OMPD_master: 6686 case OMPD_critical: 6687 case OMPD_taskyield: 6688 case OMPD_barrier: 6689 case OMPD_taskwait: 6690 case OMPD_taskgroup: 6691 case OMPD_atomic: 6692 case OMPD_flush: 6693 case OMPD_depobj: 6694 case OMPD_scan: 6695 case OMPD_teams: 6696 case OMPD_target_data: 6697 case OMPD_target_exit_data: 6698 case OMPD_target_enter_data: 6699 case OMPD_distribute: 6700 case OMPD_distribute_simd: 6701 case OMPD_distribute_parallel_for: 6702 case OMPD_distribute_parallel_for_simd: 6703 case OMPD_teams_distribute: 6704 case OMPD_teams_distribute_simd: 6705 case OMPD_teams_distribute_parallel_for: 6706 case OMPD_teams_distribute_parallel_for_simd: 6707 case OMPD_target_update: 6708 case OMPD_declare_simd: 6709 case OMPD_declare_variant: 6710 case OMPD_begin_declare_variant: 6711 case OMPD_end_declare_variant: 6712 case OMPD_declare_target: 6713 case OMPD_end_declare_target: 6714 case OMPD_declare_reduction: 6715 case OMPD_declare_mapper: 6716 case OMPD_taskloop: 6717 case OMPD_taskloop_simd: 6718 case OMPD_master_taskloop: 6719 case OMPD_master_taskloop_simd: 6720 case OMPD_parallel_master_taskloop: 6721 case OMPD_parallel_master_taskloop_simd: 6722 case OMPD_requires: 6723 case OMPD_unknown: 6724 break; 6725 default: 6726 break; 6727 } 6728 llvm_unreachable("Unexpected directive kind."); 6729 } 6730 6731 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6732 llvm::Value *DefaultThreadLimitVal) { 6733 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6734 CGF.getContext(), CS->getCapturedStmt()); 6735 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6736 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6737 llvm::Value *NumThreads = nullptr; 6738 llvm::Value *CondVal = nullptr; 6739 // Handle if clause. If if clause present, the number of threads is 6740 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6741 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6742 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6743 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6744 const OMPIfClause *IfClause = nullptr; 6745 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6746 if (C->getNameModifier() == OMPD_unknown || 6747 C->getNameModifier() == OMPD_parallel) { 6748 IfClause = C; 6749 break; 6750 } 6751 } 6752 if (IfClause) { 6753 const Expr *Cond = IfClause->getCondition(); 6754 bool Result; 6755 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6756 if (!Result) 6757 return CGF.Builder.getInt32(1); 6758 } else { 6759 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6760 if (const auto *PreInit = 6761 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6762 for (const auto *I : PreInit->decls()) { 6763 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6764 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6765 } else { 6766 CodeGenFunction::AutoVarEmission Emission = 6767 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6768 CGF.EmitAutoVarCleanups(Emission); 6769 } 6770 } 6771 } 6772 CondVal = CGF.EvaluateExprAsBool(Cond); 6773 } 6774 } 6775 } 6776 // Check the value of num_threads clause iff if clause was not specified 6777 // or is not evaluated to false. 6778 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6779 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6780 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6781 const auto *NumThreadsClause = 6782 Dir->getSingleClause<OMPNumThreadsClause>(); 6783 CodeGenFunction::LexicalScope Scope( 6784 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6785 if (const auto *PreInit = 6786 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6787 for (const auto *I : PreInit->decls()) { 6788 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6789 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6790 } else { 6791 CodeGenFunction::AutoVarEmission Emission = 6792 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6793 CGF.EmitAutoVarCleanups(Emission); 6794 } 6795 } 6796 } 6797 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6798 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6799 /*isSigned=*/false); 6800 if (DefaultThreadLimitVal) 6801 NumThreads = CGF.Builder.CreateSelect( 6802 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6803 DefaultThreadLimitVal, NumThreads); 6804 } else { 6805 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6806 : CGF.Builder.getInt32(0); 6807 } 6808 // Process condition of the if clause. 6809 if (CondVal) { 6810 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6811 CGF.Builder.getInt32(1)); 6812 } 6813 return NumThreads; 6814 } 6815 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6816 return CGF.Builder.getInt32(1); 6817 return DefaultThreadLimitVal; 6818 } 6819 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6820 : CGF.Builder.getInt32(0); 6821 } 6822 6823 /// Emit the number of threads for a target directive. Inspect the 6824 /// thread_limit clause associated with a teams construct combined or closely 6825 /// nested with the target directive. 6826 /// 6827 /// Emit the num_threads clause for directives such as 'target parallel' that 6828 /// have no associated teams construct. 6829 /// 6830 /// Otherwise, return nullptr. 6831 static llvm::Value * 6832 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6833 const OMPExecutableDirective &D) { 6834 assert(!CGF.getLangOpts().OpenMPIsDevice && 6835 "Clauses associated with the teams directive expected to be emitted " 6836 "only for the host!"); 6837 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6838 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6839 "Expected target-based executable directive."); 6840 CGBuilderTy &Bld = CGF.Builder; 6841 llvm::Value *ThreadLimitVal = nullptr; 6842 llvm::Value *NumThreadsVal = nullptr; 6843 switch (DirectiveKind) { 6844 case OMPD_target: { 6845 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6846 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6847 return NumThreads; 6848 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6849 CGF.getContext(), CS->getCapturedStmt()); 6850 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6851 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6852 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6853 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6854 const auto *ThreadLimitClause = 6855 Dir->getSingleClause<OMPThreadLimitClause>(); 6856 CodeGenFunction::LexicalScope Scope( 6857 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6858 if (const auto *PreInit = 6859 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6860 for (const auto *I : PreInit->decls()) { 6861 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6862 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6863 } else { 6864 CodeGenFunction::AutoVarEmission Emission = 6865 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6866 CGF.EmitAutoVarCleanups(Emission); 6867 } 6868 } 6869 } 6870 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6871 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6872 ThreadLimitVal = 6873 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6874 } 6875 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6876 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6877 CS = Dir->getInnermostCapturedStmt(); 6878 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6879 CGF.getContext(), CS->getCapturedStmt()); 6880 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6881 } 6882 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6883 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6884 CS = Dir->getInnermostCapturedStmt(); 6885 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6886 return NumThreads; 6887 } 6888 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6889 return Bld.getInt32(1); 6890 } 6891 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6892 } 6893 case OMPD_target_teams: { 6894 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6895 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6896 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6897 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6898 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6899 ThreadLimitVal = 6900 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6901 } 6902 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6903 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6904 return NumThreads; 6905 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6906 CGF.getContext(), CS->getCapturedStmt()); 6907 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6908 if (Dir->getDirectiveKind() == OMPD_distribute) { 6909 CS = Dir->getInnermostCapturedStmt(); 6910 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6911 return NumThreads; 6912 } 6913 } 6914 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6915 } 6916 case OMPD_target_teams_distribute: 6917 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6918 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6919 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6920 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6921 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6922 ThreadLimitVal = 6923 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6924 } 6925 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6926 case OMPD_target_parallel: 6927 case OMPD_target_parallel_for: 6928 case OMPD_target_parallel_for_simd: 6929 case OMPD_target_teams_distribute_parallel_for: 6930 case OMPD_target_teams_distribute_parallel_for_simd: { 6931 llvm::Value *CondVal = nullptr; 6932 // Handle if clause. If if clause present, the number of threads is 6933 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6934 if (D.hasClausesOfKind<OMPIfClause>()) { 6935 const OMPIfClause *IfClause = nullptr; 6936 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6937 if (C->getNameModifier() == OMPD_unknown || 6938 C->getNameModifier() == OMPD_parallel) { 6939 IfClause = C; 6940 break; 6941 } 6942 } 6943 if (IfClause) { 6944 const Expr *Cond = IfClause->getCondition(); 6945 bool Result; 6946 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6947 if (!Result) 6948 return Bld.getInt32(1); 6949 } else { 6950 CodeGenFunction::RunCleanupsScope Scope(CGF); 6951 CondVal = CGF.EvaluateExprAsBool(Cond); 6952 } 6953 } 6954 } 6955 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6956 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6957 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6958 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6959 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6960 ThreadLimitVal = 6961 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6962 } 6963 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6964 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6965 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6966 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6967 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6968 NumThreadsVal = 6969 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6970 ThreadLimitVal = ThreadLimitVal 6971 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6972 ThreadLimitVal), 6973 NumThreadsVal, ThreadLimitVal) 6974 : NumThreadsVal; 6975 } 6976 if (!ThreadLimitVal) 6977 ThreadLimitVal = Bld.getInt32(0); 6978 if (CondVal) 6979 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6980 return ThreadLimitVal; 6981 } 6982 case OMPD_target_teams_distribute_simd: 6983 case OMPD_target_simd: 6984 return Bld.getInt32(1); 6985 case OMPD_parallel: 6986 case OMPD_for: 6987 case OMPD_parallel_for: 6988 case OMPD_parallel_master: 6989 case OMPD_parallel_sections: 6990 case OMPD_for_simd: 6991 case OMPD_parallel_for_simd: 6992 case OMPD_cancel: 6993 case OMPD_cancellation_point: 6994 case OMPD_ordered: 6995 case OMPD_threadprivate: 6996 case OMPD_allocate: 6997 case OMPD_task: 6998 case OMPD_simd: 6999 case OMPD_tile: 7000 case OMPD_unroll: 7001 case OMPD_sections: 7002 case OMPD_section: 7003 case OMPD_single: 7004 case OMPD_master: 7005 case OMPD_critical: 7006 case OMPD_taskyield: 7007 case OMPD_barrier: 7008 case OMPD_taskwait: 7009 case OMPD_taskgroup: 7010 case OMPD_atomic: 7011 case OMPD_flush: 7012 case OMPD_depobj: 7013 case OMPD_scan: 7014 case OMPD_teams: 7015 case OMPD_target_data: 7016 case OMPD_target_exit_data: 7017 case OMPD_target_enter_data: 7018 case OMPD_distribute: 7019 case OMPD_distribute_simd: 7020 case OMPD_distribute_parallel_for: 7021 case OMPD_distribute_parallel_for_simd: 7022 case OMPD_teams_distribute: 7023 case OMPD_teams_distribute_simd: 7024 case OMPD_teams_distribute_parallel_for: 7025 case OMPD_teams_distribute_parallel_for_simd: 7026 case OMPD_target_update: 7027 case OMPD_declare_simd: 7028 case OMPD_declare_variant: 7029 case OMPD_begin_declare_variant: 7030 case OMPD_end_declare_variant: 7031 case OMPD_declare_target: 7032 case OMPD_end_declare_target: 7033 case OMPD_declare_reduction: 7034 case OMPD_declare_mapper: 7035 case OMPD_taskloop: 7036 case OMPD_taskloop_simd: 7037 case OMPD_master_taskloop: 7038 case OMPD_master_taskloop_simd: 7039 case OMPD_parallel_master_taskloop: 7040 case OMPD_parallel_master_taskloop_simd: 7041 case OMPD_requires: 7042 case OMPD_unknown: 7043 break; 7044 default: 7045 break; 7046 } 7047 llvm_unreachable("Unsupported directive kind."); 7048 } 7049 7050 namespace { 7051 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7052 7053 // Utility to handle information from clauses associated with a given 7054 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7055 // It provides a convenient interface to obtain the information and generate 7056 // code for that information. 7057 class MappableExprsHandler { 7058 public: 7059 /// Values for bit flags used to specify the mapping type for 7060 /// offloading. 7061 enum OpenMPOffloadMappingFlags : uint64_t { 7062 /// No flags 7063 OMP_MAP_NONE = 0x0, 7064 /// Allocate memory on the device and move data from host to device. 7065 OMP_MAP_TO = 0x01, 7066 /// Allocate memory on the device and move data from device to host. 7067 OMP_MAP_FROM = 0x02, 7068 /// Always perform the requested mapping action on the element, even 7069 /// if it was already mapped before. 7070 OMP_MAP_ALWAYS = 0x04, 7071 /// Delete the element from the device environment, ignoring the 7072 /// current reference count associated with the element. 7073 OMP_MAP_DELETE = 0x08, 7074 /// The element being mapped is a pointer-pointee pair; both the 7075 /// pointer and the pointee should be mapped. 7076 OMP_MAP_PTR_AND_OBJ = 0x10, 7077 /// This flags signals that the base address of an entry should be 7078 /// passed to the target kernel as an argument. 7079 OMP_MAP_TARGET_PARAM = 0x20, 7080 /// Signal that the runtime library has to return the device pointer 7081 /// in the current position for the data being mapped. Used when we have the 7082 /// use_device_ptr or use_device_addr clause. 7083 OMP_MAP_RETURN_PARAM = 0x40, 7084 /// This flag signals that the reference being passed is a pointer to 7085 /// private data. 7086 OMP_MAP_PRIVATE = 0x80, 7087 /// Pass the element to the device by value. 7088 OMP_MAP_LITERAL = 0x100, 7089 /// Implicit map 7090 OMP_MAP_IMPLICIT = 0x200, 7091 /// Close is a hint to the runtime to allocate memory close to 7092 /// the target device. 7093 OMP_MAP_CLOSE = 0x400, 7094 /// 0x800 is reserved for compatibility with XLC. 7095 /// Produce a runtime error if the data is not already allocated. 7096 OMP_MAP_PRESENT = 0x1000, 7097 /// Signal that the runtime library should use args as an array of 7098 /// descriptor_dim pointers and use args_size as dims. Used when we have 7099 /// non-contiguous list items in target update directive 7100 OMP_MAP_NON_CONTIG = 0x100000000000, 7101 /// The 16 MSBs of the flags indicate whether the entry is member of some 7102 /// struct/class. 7103 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7104 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7105 }; 7106 7107 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7108 static unsigned getFlagMemberOffset() { 7109 unsigned Offset = 0; 7110 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7111 Remain = Remain >> 1) 7112 Offset++; 7113 return Offset; 7114 } 7115 7116 /// Class that holds debugging information for a data mapping to be passed to 7117 /// the runtime library. 7118 class MappingExprInfo { 7119 /// The variable declaration used for the data mapping. 7120 const ValueDecl *MapDecl = nullptr; 7121 /// The original expression used in the map clause, or null if there is 7122 /// none. 7123 const Expr *MapExpr = nullptr; 7124 7125 public: 7126 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7127 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7128 7129 const ValueDecl *getMapDecl() const { return MapDecl; } 7130 const Expr *getMapExpr() const { return MapExpr; } 7131 }; 7132 7133 /// Class that associates information with a base pointer to be passed to the 7134 /// runtime library. 7135 class BasePointerInfo { 7136 /// The base pointer. 7137 llvm::Value *Ptr = nullptr; 7138 /// The base declaration that refers to this device pointer, or null if 7139 /// there is none. 7140 const ValueDecl *DevPtrDecl = nullptr; 7141 7142 public: 7143 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7144 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7145 llvm::Value *operator*() const { return Ptr; } 7146 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7147 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7148 }; 7149 7150 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7151 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7152 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7153 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7154 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7155 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7156 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7157 7158 /// This structure contains combined information generated for mappable 7159 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7160 /// mappers, and non-contiguous information. 7161 struct MapCombinedInfoTy { 7162 struct StructNonContiguousInfo { 7163 bool IsNonContiguous = false; 7164 MapDimArrayTy Dims; 7165 MapNonContiguousArrayTy Offsets; 7166 MapNonContiguousArrayTy Counts; 7167 MapNonContiguousArrayTy Strides; 7168 }; 7169 MapExprsArrayTy Exprs; 7170 MapBaseValuesArrayTy BasePointers; 7171 MapValuesArrayTy Pointers; 7172 MapValuesArrayTy Sizes; 7173 MapFlagsArrayTy Types; 7174 MapMappersArrayTy Mappers; 7175 StructNonContiguousInfo NonContigInfo; 7176 7177 /// Append arrays in \a CurInfo. 7178 void append(MapCombinedInfoTy &CurInfo) { 7179 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7180 BasePointers.append(CurInfo.BasePointers.begin(), 7181 CurInfo.BasePointers.end()); 7182 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7183 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7184 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7185 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7186 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7187 CurInfo.NonContigInfo.Dims.end()); 7188 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7189 CurInfo.NonContigInfo.Offsets.end()); 7190 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7191 CurInfo.NonContigInfo.Counts.end()); 7192 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7193 CurInfo.NonContigInfo.Strides.end()); 7194 } 7195 }; 7196 7197 /// Map between a struct and the its lowest & highest elements which have been 7198 /// mapped. 7199 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7200 /// HE(FieldIndex, Pointer)} 7201 struct StructRangeInfoTy { 7202 MapCombinedInfoTy PreliminaryMapData; 7203 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7204 0, Address::invalid()}; 7205 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7206 0, Address::invalid()}; 7207 Address Base = Address::invalid(); 7208 Address LB = Address::invalid(); 7209 bool IsArraySection = false; 7210 bool HasCompleteRecord = false; 7211 }; 7212 7213 private: 7214 /// Kind that defines how a device pointer has to be returned. 7215 struct MapInfo { 7216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7217 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7218 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7219 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7220 bool ReturnDevicePointer = false; 7221 bool IsImplicit = false; 7222 const ValueDecl *Mapper = nullptr; 7223 const Expr *VarRef = nullptr; 7224 bool ForDeviceAddr = false; 7225 7226 MapInfo() = default; 7227 MapInfo( 7228 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7229 OpenMPMapClauseKind MapType, 7230 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7231 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7232 bool ReturnDevicePointer, bool IsImplicit, 7233 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7234 bool ForDeviceAddr = false) 7235 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7236 MotionModifiers(MotionModifiers), 7237 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7238 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7239 }; 7240 7241 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7242 /// member and there is no map information about it, then emission of that 7243 /// entry is deferred until the whole struct has been processed. 7244 struct DeferredDevicePtrEntryTy { 7245 const Expr *IE = nullptr; 7246 const ValueDecl *VD = nullptr; 7247 bool ForDeviceAddr = false; 7248 7249 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7250 bool ForDeviceAddr) 7251 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7252 }; 7253 7254 /// The target directive from where the mappable clauses were extracted. It 7255 /// is either a executable directive or a user-defined mapper directive. 7256 llvm::PointerUnion<const OMPExecutableDirective *, 7257 const OMPDeclareMapperDecl *> 7258 CurDir; 7259 7260 /// Function the directive is being generated for. 7261 CodeGenFunction &CGF; 7262 7263 /// Set of all first private variables in the current directive. 7264 /// bool data is set to true if the variable is implicitly marked as 7265 /// firstprivate, false otherwise. 7266 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7267 7268 /// Map between device pointer declarations and their expression components. 7269 /// The key value for declarations in 'this' is null. 7270 llvm::DenseMap< 7271 const ValueDecl *, 7272 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7273 DevPointersMap; 7274 7275 llvm::Value *getExprTypeSize(const Expr *E) const { 7276 QualType ExprTy = E->getType().getCanonicalType(); 7277 7278 // Calculate the size for array shaping expression. 7279 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7280 llvm::Value *Size = 7281 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7282 for (const Expr *SE : OAE->getDimensions()) { 7283 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7284 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7285 CGF.getContext().getSizeType(), 7286 SE->getExprLoc()); 7287 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7288 } 7289 return Size; 7290 } 7291 7292 // Reference types are ignored for mapping purposes. 7293 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7294 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7295 7296 // Given that an array section is considered a built-in type, we need to 7297 // do the calculation based on the length of the section instead of relying 7298 // on CGF.getTypeSize(E->getType()). 7299 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7300 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7301 OAE->getBase()->IgnoreParenImpCasts()) 7302 .getCanonicalType(); 7303 7304 // If there is no length associated with the expression and lower bound is 7305 // not specified too, that means we are using the whole length of the 7306 // base. 7307 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7308 !OAE->getLowerBound()) 7309 return CGF.getTypeSize(BaseTy); 7310 7311 llvm::Value *ElemSize; 7312 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7313 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7314 } else { 7315 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7316 assert(ATy && "Expecting array type if not a pointer type."); 7317 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7318 } 7319 7320 // If we don't have a length at this point, that is because we have an 7321 // array section with a single element. 7322 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7323 return ElemSize; 7324 7325 if (const Expr *LenExpr = OAE->getLength()) { 7326 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7327 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7328 CGF.getContext().getSizeType(), 7329 LenExpr->getExprLoc()); 7330 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7331 } 7332 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7333 OAE->getLowerBound() && "expected array_section[lb:]."); 7334 // Size = sizetype - lb * elemtype; 7335 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7336 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7337 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7338 CGF.getContext().getSizeType(), 7339 OAE->getLowerBound()->getExprLoc()); 7340 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7341 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7342 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7343 LengthVal = CGF.Builder.CreateSelect( 7344 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7345 return LengthVal; 7346 } 7347 return CGF.getTypeSize(ExprTy); 7348 } 7349 7350 /// Return the corresponding bits for a given map clause modifier. Add 7351 /// a flag marking the map as a pointer if requested. Add a flag marking the 7352 /// map as the first one of a series of maps that relate to the same map 7353 /// expression. 7354 OpenMPOffloadMappingFlags getMapTypeBits( 7355 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7356 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7357 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7358 OpenMPOffloadMappingFlags Bits = 7359 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7360 switch (MapType) { 7361 case OMPC_MAP_alloc: 7362 case OMPC_MAP_release: 7363 // alloc and release is the default behavior in the runtime library, i.e. 7364 // if we don't pass any bits alloc/release that is what the runtime is 7365 // going to do. Therefore, we don't need to signal anything for these two 7366 // type modifiers. 7367 break; 7368 case OMPC_MAP_to: 7369 Bits |= OMP_MAP_TO; 7370 break; 7371 case OMPC_MAP_from: 7372 Bits |= OMP_MAP_FROM; 7373 break; 7374 case OMPC_MAP_tofrom: 7375 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7376 break; 7377 case OMPC_MAP_delete: 7378 Bits |= OMP_MAP_DELETE; 7379 break; 7380 case OMPC_MAP_unknown: 7381 llvm_unreachable("Unexpected map type!"); 7382 } 7383 if (AddPtrFlag) 7384 Bits |= OMP_MAP_PTR_AND_OBJ; 7385 if (AddIsTargetParamFlag) 7386 Bits |= OMP_MAP_TARGET_PARAM; 7387 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7388 != MapModifiers.end()) 7389 Bits |= OMP_MAP_ALWAYS; 7390 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7391 != MapModifiers.end()) 7392 Bits |= OMP_MAP_CLOSE; 7393 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7394 MapModifiers.end() || 7395 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7396 MotionModifiers.end()) 7397 Bits |= OMP_MAP_PRESENT; 7398 if (IsNonContiguous) 7399 Bits |= OMP_MAP_NON_CONTIG; 7400 return Bits; 7401 } 7402 7403 /// Return true if the provided expression is a final array section. A 7404 /// final array section, is one whose length can't be proved to be one. 7405 bool isFinalArraySectionExpression(const Expr *E) const { 7406 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7407 7408 // It is not an array section and therefore not a unity-size one. 7409 if (!OASE) 7410 return false; 7411 7412 // An array section with no colon always refer to a single element. 7413 if (OASE->getColonLocFirst().isInvalid()) 7414 return false; 7415 7416 const Expr *Length = OASE->getLength(); 7417 7418 // If we don't have a length we have to check if the array has size 1 7419 // for this dimension. Also, we should always expect a length if the 7420 // base type is pointer. 7421 if (!Length) { 7422 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7423 OASE->getBase()->IgnoreParenImpCasts()) 7424 .getCanonicalType(); 7425 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7426 return ATy->getSize().getSExtValue() != 1; 7427 // If we don't have a constant dimension length, we have to consider 7428 // the current section as having any size, so it is not necessarily 7429 // unitary. If it happen to be unity size, that's user fault. 7430 return true; 7431 } 7432 7433 // Check if the length evaluates to 1. 7434 Expr::EvalResult Result; 7435 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7436 return true; // Can have more that size 1. 7437 7438 llvm::APSInt ConstLength = Result.Val.getInt(); 7439 return ConstLength.getSExtValue() != 1; 7440 } 7441 7442 /// Generate the base pointers, section pointers, sizes, map type bits, and 7443 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7444 /// map type, map or motion modifiers, and expression components. 7445 /// \a IsFirstComponent should be set to true if the provided set of 7446 /// components is the first associated with a capture. 7447 void generateInfoForComponentList( 7448 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7449 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7450 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7451 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7452 bool IsFirstComponentList, bool IsImplicit, 7453 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7454 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7455 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7456 OverlappedElements = llvm::None) const { 7457 // The following summarizes what has to be generated for each map and the 7458 // types below. The generated information is expressed in this order: 7459 // base pointer, section pointer, size, flags 7460 // (to add to the ones that come from the map type and modifier). 7461 // 7462 // double d; 7463 // int i[100]; 7464 // float *p; 7465 // 7466 // struct S1 { 7467 // int i; 7468 // float f[50]; 7469 // } 7470 // struct S2 { 7471 // int i; 7472 // float f[50]; 7473 // S1 s; 7474 // double *p; 7475 // struct S2 *ps; 7476 // int &ref; 7477 // } 7478 // S2 s; 7479 // S2 *ps; 7480 // 7481 // map(d) 7482 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7483 // 7484 // map(i) 7485 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7486 // 7487 // map(i[1:23]) 7488 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7489 // 7490 // map(p) 7491 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7492 // 7493 // map(p[1:24]) 7494 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7495 // in unified shared memory mode or for local pointers 7496 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7497 // 7498 // map(s) 7499 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7500 // 7501 // map(s.i) 7502 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7503 // 7504 // map(s.s.f) 7505 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7506 // 7507 // map(s.p) 7508 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7509 // 7510 // map(to: s.p[:22]) 7511 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7512 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7513 // &(s.p), &(s.p[0]), 22*sizeof(double), 7514 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7515 // (*) alloc space for struct members, only this is a target parameter 7516 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7517 // optimizes this entry out, same in the examples below) 7518 // (***) map the pointee (map: to) 7519 // 7520 // map(to: s.ref) 7521 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7522 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7523 // (*) alloc space for struct members, only this is a target parameter 7524 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7525 // optimizes this entry out, same in the examples below) 7526 // (***) map the pointee (map: to) 7527 // 7528 // map(s.ps) 7529 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7530 // 7531 // map(from: s.ps->s.i) 7532 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7533 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7534 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7535 // 7536 // map(to: s.ps->ps) 7537 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7538 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7539 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7540 // 7541 // map(s.ps->ps->ps) 7542 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7543 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7544 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7545 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7546 // 7547 // map(to: s.ps->ps->s.f[:22]) 7548 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7549 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7550 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7551 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7552 // 7553 // map(ps) 7554 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7555 // 7556 // map(ps->i) 7557 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7558 // 7559 // map(ps->s.f) 7560 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7561 // 7562 // map(from: ps->p) 7563 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7564 // 7565 // map(to: ps->p[:22]) 7566 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7567 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7568 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7569 // 7570 // map(ps->ps) 7571 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7572 // 7573 // map(from: ps->ps->s.i) 7574 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7575 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7576 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7577 // 7578 // map(from: ps->ps->ps) 7579 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7580 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7581 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7582 // 7583 // map(ps->ps->ps->ps) 7584 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7585 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7586 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7587 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7588 // 7589 // map(to: ps->ps->ps->s.f[:22]) 7590 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7591 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7592 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7593 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7594 // 7595 // map(to: s.f[:22]) map(from: s.p[:33]) 7596 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7597 // sizeof(double*) (**), TARGET_PARAM 7598 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7599 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7600 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7601 // (*) allocate contiguous space needed to fit all mapped members even if 7602 // we allocate space for members not mapped (in this example, 7603 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7604 // them as well because they fall between &s.f[0] and &s.p) 7605 // 7606 // map(from: s.f[:22]) map(to: ps->p[:33]) 7607 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7608 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7609 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7610 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7611 // (*) the struct this entry pertains to is the 2nd element in the list of 7612 // arguments, hence MEMBER_OF(2) 7613 // 7614 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7615 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7616 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7617 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7618 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7619 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7620 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7621 // (*) the struct this entry pertains to is the 4th element in the list 7622 // of arguments, hence MEMBER_OF(4) 7623 7624 // Track if the map information being generated is the first for a capture. 7625 bool IsCaptureFirstInfo = IsFirstComponentList; 7626 // When the variable is on a declare target link or in a to clause with 7627 // unified memory, a reference is needed to hold the host/device address 7628 // of the variable. 7629 bool RequiresReference = false; 7630 7631 // Scan the components from the base to the complete expression. 7632 auto CI = Components.rbegin(); 7633 auto CE = Components.rend(); 7634 auto I = CI; 7635 7636 // Track if the map information being generated is the first for a list of 7637 // components. 7638 bool IsExpressionFirstInfo = true; 7639 bool FirstPointerInComplexData = false; 7640 Address BP = Address::invalid(); 7641 const Expr *AssocExpr = I->getAssociatedExpression(); 7642 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7643 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7644 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7645 7646 if (isa<MemberExpr>(AssocExpr)) { 7647 // The base is the 'this' pointer. The content of the pointer is going 7648 // to be the base of the field being mapped. 7649 BP = CGF.LoadCXXThisAddress(); 7650 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7651 (OASE && 7652 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7653 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7654 } else if (OAShE && 7655 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7656 BP = Address( 7657 CGF.EmitScalarExpr(OAShE->getBase()), 7658 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7659 } else { 7660 // The base is the reference to the variable. 7661 // BP = &Var. 7662 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7663 if (const auto *VD = 7664 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7665 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7666 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7667 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7668 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7669 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7670 RequiresReference = true; 7671 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7672 } 7673 } 7674 } 7675 7676 // If the variable is a pointer and is being dereferenced (i.e. is not 7677 // the last component), the base has to be the pointer itself, not its 7678 // reference. References are ignored for mapping purposes. 7679 QualType Ty = 7680 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7681 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7682 // No need to generate individual map information for the pointer, it 7683 // can be associated with the combined storage if shared memory mode is 7684 // active or the base declaration is not global variable. 7685 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7686 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7687 !VD || VD->hasLocalStorage()) 7688 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7689 else 7690 FirstPointerInComplexData = true; 7691 ++I; 7692 } 7693 } 7694 7695 // Track whether a component of the list should be marked as MEMBER_OF some 7696 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7697 // in a component list should be marked as MEMBER_OF, all subsequent entries 7698 // do not belong to the base struct. E.g. 7699 // struct S2 s; 7700 // s.ps->ps->ps->f[:] 7701 // (1) (2) (3) (4) 7702 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7703 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7704 // is the pointee of ps(2) which is not member of struct s, so it should not 7705 // be marked as such (it is still PTR_AND_OBJ). 7706 // The variable is initialized to false so that PTR_AND_OBJ entries which 7707 // are not struct members are not considered (e.g. array of pointers to 7708 // data). 7709 bool ShouldBeMemberOf = false; 7710 7711 // Variable keeping track of whether or not we have encountered a component 7712 // in the component list which is a member expression. Useful when we have a 7713 // pointer or a final array section, in which case it is the previous 7714 // component in the list which tells us whether we have a member expression. 7715 // E.g. X.f[:] 7716 // While processing the final array section "[:]" it is "f" which tells us 7717 // whether we are dealing with a member of a declared struct. 7718 const MemberExpr *EncounteredME = nullptr; 7719 7720 // Track for the total number of dimension. Start from one for the dummy 7721 // dimension. 7722 uint64_t DimSize = 1; 7723 7724 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7725 bool IsPrevMemberReference = false; 7726 7727 for (; I != CE; ++I) { 7728 // If the current component is member of a struct (parent struct) mark it. 7729 if (!EncounteredME) { 7730 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7731 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7732 // as MEMBER_OF the parent struct. 7733 if (EncounteredME) { 7734 ShouldBeMemberOf = true; 7735 // Do not emit as complex pointer if this is actually not array-like 7736 // expression. 7737 if (FirstPointerInComplexData) { 7738 QualType Ty = std::prev(I) 7739 ->getAssociatedDeclaration() 7740 ->getType() 7741 .getNonReferenceType(); 7742 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7743 FirstPointerInComplexData = false; 7744 } 7745 } 7746 } 7747 7748 auto Next = std::next(I); 7749 7750 // We need to generate the addresses and sizes if this is the last 7751 // component, if the component is a pointer or if it is an array section 7752 // whose length can't be proved to be one. If this is a pointer, it 7753 // becomes the base address for the following components. 7754 7755 // A final array section, is one whose length can't be proved to be one. 7756 // If the map item is non-contiguous then we don't treat any array section 7757 // as final array section. 7758 bool IsFinalArraySection = 7759 !IsNonContiguous && 7760 isFinalArraySectionExpression(I->getAssociatedExpression()); 7761 7762 // If we have a declaration for the mapping use that, otherwise use 7763 // the base declaration of the map clause. 7764 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7765 ? I->getAssociatedDeclaration() 7766 : BaseDecl; 7767 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7768 : MapExpr; 7769 7770 // Get information on whether the element is a pointer. Have to do a 7771 // special treatment for array sections given that they are built-in 7772 // types. 7773 const auto *OASE = 7774 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7775 const auto *OAShE = 7776 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7777 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7778 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7779 bool IsPointer = 7780 OAShE || 7781 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7782 .getCanonicalType() 7783 ->isAnyPointerType()) || 7784 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7785 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7786 MapDecl && 7787 MapDecl->getType()->isLValueReferenceType(); 7788 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7789 7790 if (OASE) 7791 ++DimSize; 7792 7793 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7794 IsFinalArraySection) { 7795 // If this is not the last component, we expect the pointer to be 7796 // associated with an array expression or member expression. 7797 assert((Next == CE || 7798 isa<MemberExpr>(Next->getAssociatedExpression()) || 7799 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7800 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7801 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7802 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7803 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7804 "Unexpected expression"); 7805 7806 Address LB = Address::invalid(); 7807 Address LowestElem = Address::invalid(); 7808 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7809 const MemberExpr *E) { 7810 const Expr *BaseExpr = E->getBase(); 7811 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7812 // scalar. 7813 LValue BaseLV; 7814 if (E->isArrow()) { 7815 LValueBaseInfo BaseInfo; 7816 TBAAAccessInfo TBAAInfo; 7817 Address Addr = 7818 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7819 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7820 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7821 } else { 7822 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7823 } 7824 return BaseLV; 7825 }; 7826 if (OAShE) { 7827 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7828 CGF.getContext().getTypeAlignInChars( 7829 OAShE->getBase()->getType())); 7830 } else if (IsMemberReference) { 7831 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7832 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7833 LowestElem = CGF.EmitLValueForFieldInitialization( 7834 BaseLVal, cast<FieldDecl>(MapDecl)) 7835 .getAddress(CGF); 7836 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7837 .getAddress(CGF); 7838 } else { 7839 LowestElem = LB = 7840 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7841 .getAddress(CGF); 7842 } 7843 7844 // If this component is a pointer inside the base struct then we don't 7845 // need to create any entry for it - it will be combined with the object 7846 // it is pointing to into a single PTR_AND_OBJ entry. 7847 bool IsMemberPointerOrAddr = 7848 EncounteredME && 7849 (((IsPointer || ForDeviceAddr) && 7850 I->getAssociatedExpression() == EncounteredME) || 7851 (IsPrevMemberReference && !IsPointer) || 7852 (IsMemberReference && Next != CE && 7853 !Next->getAssociatedExpression()->getType()->isPointerType())); 7854 if (!OverlappedElements.empty() && Next == CE) { 7855 // Handle base element with the info for overlapped elements. 7856 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7857 assert(!IsPointer && 7858 "Unexpected base element with the pointer type."); 7859 // Mark the whole struct as the struct that requires allocation on the 7860 // device. 7861 PartialStruct.LowestElem = {0, LowestElem}; 7862 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7863 I->getAssociatedExpression()->getType()); 7864 Address HB = CGF.Builder.CreateConstGEP( 7865 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7866 CGF.VoidPtrTy), 7867 TypeSize.getQuantity() - 1); 7868 PartialStruct.HighestElem = { 7869 std::numeric_limits<decltype( 7870 PartialStruct.HighestElem.first)>::max(), 7871 HB}; 7872 PartialStruct.Base = BP; 7873 PartialStruct.LB = LB; 7874 assert( 7875 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7876 "Overlapped elements must be used only once for the variable."); 7877 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7878 // Emit data for non-overlapped data. 7879 OpenMPOffloadMappingFlags Flags = 7880 OMP_MAP_MEMBER_OF | 7881 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7882 /*AddPtrFlag=*/false, 7883 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7884 llvm::Value *Size = nullptr; 7885 // Do bitcopy of all non-overlapped structure elements. 7886 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7887 Component : OverlappedElements) { 7888 Address ComponentLB = Address::invalid(); 7889 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7890 Component) { 7891 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7892 const auto *FD = dyn_cast<FieldDecl>(VD); 7893 if (FD && FD->getType()->isLValueReferenceType()) { 7894 const auto *ME = 7895 cast<MemberExpr>(MC.getAssociatedExpression()); 7896 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7897 ComponentLB = 7898 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7899 .getAddress(CGF); 7900 } else { 7901 ComponentLB = 7902 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7903 .getAddress(CGF); 7904 } 7905 Size = CGF.Builder.CreatePtrDiff( 7906 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7907 CGF.EmitCastToVoidPtr(LB.getPointer())); 7908 break; 7909 } 7910 } 7911 assert(Size && "Failed to determine structure size"); 7912 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7913 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7914 CombinedInfo.Pointers.push_back(LB.getPointer()); 7915 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7916 Size, CGF.Int64Ty, /*isSigned=*/true)); 7917 CombinedInfo.Types.push_back(Flags); 7918 CombinedInfo.Mappers.push_back(nullptr); 7919 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7920 : 1); 7921 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7922 } 7923 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7924 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7925 CombinedInfo.Pointers.push_back(LB.getPointer()); 7926 Size = CGF.Builder.CreatePtrDiff( 7927 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7928 CGF.EmitCastToVoidPtr(LB.getPointer())); 7929 CombinedInfo.Sizes.push_back( 7930 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7931 CombinedInfo.Types.push_back(Flags); 7932 CombinedInfo.Mappers.push_back(nullptr); 7933 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7934 : 1); 7935 break; 7936 } 7937 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7938 if (!IsMemberPointerOrAddr || 7939 (Next == CE && MapType != OMPC_MAP_unknown)) { 7940 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7941 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7942 CombinedInfo.Pointers.push_back(LB.getPointer()); 7943 CombinedInfo.Sizes.push_back( 7944 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7945 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7946 : 1); 7947 7948 // If Mapper is valid, the last component inherits the mapper. 7949 bool HasMapper = Mapper && Next == CE; 7950 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7951 7952 // We need to add a pointer flag for each map that comes from the 7953 // same expression except for the first one. We also need to signal 7954 // this map is the first one that relates with the current capture 7955 // (there is a set of entries for each capture). 7956 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7957 MapType, MapModifiers, MotionModifiers, IsImplicit, 7958 !IsExpressionFirstInfo || RequiresReference || 7959 FirstPointerInComplexData || IsMemberReference, 7960 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7961 7962 if (!IsExpressionFirstInfo || IsMemberReference) { 7963 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7964 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7965 if (IsPointer || (IsMemberReference && Next != CE)) 7966 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7967 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7968 7969 if (ShouldBeMemberOf) { 7970 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7971 // should be later updated with the correct value of MEMBER_OF. 7972 Flags |= OMP_MAP_MEMBER_OF; 7973 // From now on, all subsequent PTR_AND_OBJ entries should not be 7974 // marked as MEMBER_OF. 7975 ShouldBeMemberOf = false; 7976 } 7977 } 7978 7979 CombinedInfo.Types.push_back(Flags); 7980 } 7981 7982 // If we have encountered a member expression so far, keep track of the 7983 // mapped member. If the parent is "*this", then the value declaration 7984 // is nullptr. 7985 if (EncounteredME) { 7986 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7987 unsigned FieldIndex = FD->getFieldIndex(); 7988 7989 // Update info about the lowest and highest elements for this struct 7990 if (!PartialStruct.Base.isValid()) { 7991 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7992 if (IsFinalArraySection) { 7993 Address HB = 7994 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7995 .getAddress(CGF); 7996 PartialStruct.HighestElem = {FieldIndex, HB}; 7997 } else { 7998 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7999 } 8000 PartialStruct.Base = BP; 8001 PartialStruct.LB = BP; 8002 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8003 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8004 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8005 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8006 } 8007 } 8008 8009 // Need to emit combined struct for array sections. 8010 if (IsFinalArraySection || IsNonContiguous) 8011 PartialStruct.IsArraySection = true; 8012 8013 // If we have a final array section, we are done with this expression. 8014 if (IsFinalArraySection) 8015 break; 8016 8017 // The pointer becomes the base for the next element. 8018 if (Next != CE) 8019 BP = IsMemberReference ? LowestElem : LB; 8020 8021 IsExpressionFirstInfo = false; 8022 IsCaptureFirstInfo = false; 8023 FirstPointerInComplexData = false; 8024 IsPrevMemberReference = IsMemberReference; 8025 } else if (FirstPointerInComplexData) { 8026 QualType Ty = Components.rbegin() 8027 ->getAssociatedDeclaration() 8028 ->getType() 8029 .getNonReferenceType(); 8030 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8031 FirstPointerInComplexData = false; 8032 } 8033 } 8034 // If ran into the whole component - allocate the space for the whole 8035 // record. 8036 if (!EncounteredME) 8037 PartialStruct.HasCompleteRecord = true; 8038 8039 if (!IsNonContiguous) 8040 return; 8041 8042 const ASTContext &Context = CGF.getContext(); 8043 8044 // For supporting stride in array section, we need to initialize the first 8045 // dimension size as 1, first offset as 0, and first count as 1 8046 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8047 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8048 MapValuesArrayTy CurStrides; 8049 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8050 uint64_t ElementTypeSize; 8051 8052 // Collect Size information for each dimension and get the element size as 8053 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8054 // should be [10, 10] and the first stride is 4 btyes. 8055 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8056 Components) { 8057 const Expr *AssocExpr = Component.getAssociatedExpression(); 8058 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8059 8060 if (!OASE) 8061 continue; 8062 8063 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8064 auto *CAT = Context.getAsConstantArrayType(Ty); 8065 auto *VAT = Context.getAsVariableArrayType(Ty); 8066 8067 // We need all the dimension size except for the last dimension. 8068 assert((VAT || CAT || &Component == &*Components.begin()) && 8069 "Should be either ConstantArray or VariableArray if not the " 8070 "first Component"); 8071 8072 // Get element size if CurStrides is empty. 8073 if (CurStrides.empty()) { 8074 const Type *ElementType = nullptr; 8075 if (CAT) 8076 ElementType = CAT->getElementType().getTypePtr(); 8077 else if (VAT) 8078 ElementType = VAT->getElementType().getTypePtr(); 8079 else 8080 assert(&Component == &*Components.begin() && 8081 "Only expect pointer (non CAT or VAT) when this is the " 8082 "first Component"); 8083 // If ElementType is null, then it means the base is a pointer 8084 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8085 // for next iteration. 8086 if (ElementType) { 8087 // For the case that having pointer as base, we need to remove one 8088 // level of indirection. 8089 if (&Component != &*Components.begin()) 8090 ElementType = ElementType->getPointeeOrArrayElementType(); 8091 ElementTypeSize = 8092 Context.getTypeSizeInChars(ElementType).getQuantity(); 8093 CurStrides.push_back( 8094 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8095 } 8096 } 8097 // Get dimension value except for the last dimension since we don't need 8098 // it. 8099 if (DimSizes.size() < Components.size() - 1) { 8100 if (CAT) 8101 DimSizes.push_back(llvm::ConstantInt::get( 8102 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8103 else if (VAT) 8104 DimSizes.push_back(CGF.Builder.CreateIntCast( 8105 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8106 /*IsSigned=*/false)); 8107 } 8108 } 8109 8110 // Skip the dummy dimension since we have already have its information. 8111 auto DI = DimSizes.begin() + 1; 8112 // Product of dimension. 8113 llvm::Value *DimProd = 8114 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8115 8116 // Collect info for non-contiguous. Notice that offset, count, and stride 8117 // are only meaningful for array-section, so we insert a null for anything 8118 // other than array-section. 8119 // Also, the size of offset, count, and stride are not the same as 8120 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8121 // count, and stride are the same as the number of non-contiguous 8122 // declaration in target update to/from clause. 8123 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8124 Components) { 8125 const Expr *AssocExpr = Component.getAssociatedExpression(); 8126 8127 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8128 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8129 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8130 /*isSigned=*/false); 8131 CurOffsets.push_back(Offset); 8132 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8133 CurStrides.push_back(CurStrides.back()); 8134 continue; 8135 } 8136 8137 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8138 8139 if (!OASE) 8140 continue; 8141 8142 // Offset 8143 const Expr *OffsetExpr = OASE->getLowerBound(); 8144 llvm::Value *Offset = nullptr; 8145 if (!OffsetExpr) { 8146 // If offset is absent, then we just set it to zero. 8147 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8148 } else { 8149 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8150 CGF.Int64Ty, 8151 /*isSigned=*/false); 8152 } 8153 CurOffsets.push_back(Offset); 8154 8155 // Count 8156 const Expr *CountExpr = OASE->getLength(); 8157 llvm::Value *Count = nullptr; 8158 if (!CountExpr) { 8159 // In Clang, once a high dimension is an array section, we construct all 8160 // the lower dimension as array section, however, for case like 8161 // arr[0:2][2], Clang construct the inner dimension as an array section 8162 // but it actually is not in an array section form according to spec. 8163 if (!OASE->getColonLocFirst().isValid() && 8164 !OASE->getColonLocSecond().isValid()) { 8165 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8166 } else { 8167 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8168 // When the length is absent it defaults to ⌈(size − 8169 // lower-bound)/stride⌉, where size is the size of the array 8170 // dimension. 8171 const Expr *StrideExpr = OASE->getStride(); 8172 llvm::Value *Stride = 8173 StrideExpr 8174 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8175 CGF.Int64Ty, /*isSigned=*/false) 8176 : nullptr; 8177 if (Stride) 8178 Count = CGF.Builder.CreateUDiv( 8179 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8180 else 8181 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8182 } 8183 } else { 8184 Count = CGF.EmitScalarExpr(CountExpr); 8185 } 8186 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8187 CurCounts.push_back(Count); 8188 8189 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8190 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8191 // Offset Count Stride 8192 // D0 0 1 4 (int) <- dummy dimension 8193 // D1 0 2 8 (2 * (1) * 4) 8194 // D2 1 2 20 (1 * (1 * 5) * 4) 8195 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8196 const Expr *StrideExpr = OASE->getStride(); 8197 llvm::Value *Stride = 8198 StrideExpr 8199 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8200 CGF.Int64Ty, /*isSigned=*/false) 8201 : nullptr; 8202 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8203 if (Stride) 8204 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8205 else 8206 CurStrides.push_back(DimProd); 8207 if (DI != DimSizes.end()) 8208 ++DI; 8209 } 8210 8211 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8212 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8213 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8214 } 8215 8216 /// Return the adjusted map modifiers if the declaration a capture refers to 8217 /// appears in a first-private clause. This is expected to be used only with 8218 /// directives that start with 'target'. 8219 MappableExprsHandler::OpenMPOffloadMappingFlags 8220 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8221 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8222 8223 // A first private variable captured by reference will use only the 8224 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8225 // declaration is known as first-private in this handler. 8226 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8227 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8228 return MappableExprsHandler::OMP_MAP_TO | 8229 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8230 return MappableExprsHandler::OMP_MAP_PRIVATE | 8231 MappableExprsHandler::OMP_MAP_TO; 8232 } 8233 return MappableExprsHandler::OMP_MAP_TO | 8234 MappableExprsHandler::OMP_MAP_FROM; 8235 } 8236 8237 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8238 // Rotate by getFlagMemberOffset() bits. 8239 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8240 << getFlagMemberOffset()); 8241 } 8242 8243 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8244 OpenMPOffloadMappingFlags MemberOfFlag) { 8245 // If the entry is PTR_AND_OBJ but has not been marked with the special 8246 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8247 // marked as MEMBER_OF. 8248 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8249 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8250 return; 8251 8252 // Reset the placeholder value to prepare the flag for the assignment of the 8253 // proper MEMBER_OF value. 8254 Flags &= ~OMP_MAP_MEMBER_OF; 8255 Flags |= MemberOfFlag; 8256 } 8257 8258 void getPlainLayout(const CXXRecordDecl *RD, 8259 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8260 bool AsBase) const { 8261 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8262 8263 llvm::StructType *St = 8264 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8265 8266 unsigned NumElements = St->getNumElements(); 8267 llvm::SmallVector< 8268 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8269 RecordLayout(NumElements); 8270 8271 // Fill bases. 8272 for (const auto &I : RD->bases()) { 8273 if (I.isVirtual()) 8274 continue; 8275 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8276 // Ignore empty bases. 8277 if (Base->isEmpty() || CGF.getContext() 8278 .getASTRecordLayout(Base) 8279 .getNonVirtualSize() 8280 .isZero()) 8281 continue; 8282 8283 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8284 RecordLayout[FieldIndex] = Base; 8285 } 8286 // Fill in virtual bases. 8287 for (const auto &I : RD->vbases()) { 8288 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8289 // Ignore empty bases. 8290 if (Base->isEmpty()) 8291 continue; 8292 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8293 if (RecordLayout[FieldIndex]) 8294 continue; 8295 RecordLayout[FieldIndex] = Base; 8296 } 8297 // Fill in all the fields. 8298 assert(!RD->isUnion() && "Unexpected union."); 8299 for (const auto *Field : RD->fields()) { 8300 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8301 // will fill in later.) 8302 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8303 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8304 RecordLayout[FieldIndex] = Field; 8305 } 8306 } 8307 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8308 &Data : RecordLayout) { 8309 if (Data.isNull()) 8310 continue; 8311 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8312 getPlainLayout(Base, Layout, /*AsBase=*/true); 8313 else 8314 Layout.push_back(Data.get<const FieldDecl *>()); 8315 } 8316 } 8317 8318 /// Generate all the base pointers, section pointers, sizes, map types, and 8319 /// mappers for the extracted mappable expressions (all included in \a 8320 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8321 /// pair of the relevant declaration and index where it occurs is appended to 8322 /// the device pointers info array. 8323 void generateAllInfoForClauses( 8324 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8325 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8326 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8327 // We have to process the component lists that relate with the same 8328 // declaration in a single chunk so that we can generate the map flags 8329 // correctly. Therefore, we organize all lists in a map. 8330 enum MapKind { Present, Allocs, Other, Total }; 8331 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8332 SmallVector<SmallVector<MapInfo, 8>, 4>> 8333 Info; 8334 8335 // Helper function to fill the information map for the different supported 8336 // clauses. 8337 auto &&InfoGen = 8338 [&Info, &SkipVarSet]( 8339 const ValueDecl *D, MapKind Kind, 8340 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8341 OpenMPMapClauseKind MapType, 8342 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8343 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8344 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8345 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8346 if (SkipVarSet.contains(D)) 8347 return; 8348 auto It = Info.find(D); 8349 if (It == Info.end()) 8350 It = Info 8351 .insert(std::make_pair( 8352 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8353 .first; 8354 It->second[Kind].emplace_back( 8355 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8356 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8357 }; 8358 8359 for (const auto *Cl : Clauses) { 8360 const auto *C = dyn_cast<OMPMapClause>(Cl); 8361 if (!C) 8362 continue; 8363 MapKind Kind = Other; 8364 if (!C->getMapTypeModifiers().empty() && 8365 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8366 return K == OMPC_MAP_MODIFIER_present; 8367 })) 8368 Kind = Present; 8369 else if (C->getMapType() == OMPC_MAP_alloc) 8370 Kind = Allocs; 8371 const auto *EI = C->getVarRefs().begin(); 8372 for (const auto L : C->component_lists()) { 8373 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8374 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8375 C->getMapTypeModifiers(), llvm::None, 8376 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8377 E); 8378 ++EI; 8379 } 8380 } 8381 for (const auto *Cl : Clauses) { 8382 const auto *C = dyn_cast<OMPToClause>(Cl); 8383 if (!C) 8384 continue; 8385 MapKind Kind = Other; 8386 if (!C->getMotionModifiers().empty() && 8387 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8388 return K == OMPC_MOTION_MODIFIER_present; 8389 })) 8390 Kind = Present; 8391 const auto *EI = C->getVarRefs().begin(); 8392 for (const auto L : C->component_lists()) { 8393 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8394 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8395 C->isImplicit(), std::get<2>(L), *EI); 8396 ++EI; 8397 } 8398 } 8399 for (const auto *Cl : Clauses) { 8400 const auto *C = dyn_cast<OMPFromClause>(Cl); 8401 if (!C) 8402 continue; 8403 MapKind Kind = Other; 8404 if (!C->getMotionModifiers().empty() && 8405 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8406 return K == OMPC_MOTION_MODIFIER_present; 8407 })) 8408 Kind = Present; 8409 const auto *EI = C->getVarRefs().begin(); 8410 for (const auto L : C->component_lists()) { 8411 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8412 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8413 C->isImplicit(), std::get<2>(L), *EI); 8414 ++EI; 8415 } 8416 } 8417 8418 // Look at the use_device_ptr clause information and mark the existing map 8419 // entries as such. If there is no map information for an entry in the 8420 // use_device_ptr list, we create one with map type 'alloc' and zero size 8421 // section. It is the user fault if that was not mapped before. If there is 8422 // no map information and the pointer is a struct member, then we defer the 8423 // emission of that entry until the whole struct has been processed. 8424 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8425 SmallVector<DeferredDevicePtrEntryTy, 4>> 8426 DeferredInfo; 8427 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8428 8429 for (const auto *Cl : Clauses) { 8430 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8431 if (!C) 8432 continue; 8433 for (const auto L : C->component_lists()) { 8434 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8435 std::get<1>(L); 8436 assert(!Components.empty() && 8437 "Not expecting empty list of components!"); 8438 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8439 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8440 const Expr *IE = Components.back().getAssociatedExpression(); 8441 // If the first component is a member expression, we have to look into 8442 // 'this', which maps to null in the map of map information. Otherwise 8443 // look directly for the information. 8444 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8445 8446 // We potentially have map information for this declaration already. 8447 // Look for the first set of components that refer to it. 8448 if (It != Info.end()) { 8449 bool Found = false; 8450 for (auto &Data : It->second) { 8451 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8452 return MI.Components.back().getAssociatedDeclaration() == VD; 8453 }); 8454 // If we found a map entry, signal that the pointer has to be 8455 // returned and move on to the next declaration. Exclude cases where 8456 // the base pointer is mapped as array subscript, array section or 8457 // array shaping. The base address is passed as a pointer to base in 8458 // this case and cannot be used as a base for use_device_ptr list 8459 // item. 8460 if (CI != Data.end()) { 8461 auto PrevCI = std::next(CI->Components.rbegin()); 8462 const auto *VarD = dyn_cast<VarDecl>(VD); 8463 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8464 isa<MemberExpr>(IE) || 8465 !VD->getType().getNonReferenceType()->isPointerType() || 8466 PrevCI == CI->Components.rend() || 8467 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8468 VarD->hasLocalStorage()) { 8469 CI->ReturnDevicePointer = true; 8470 Found = true; 8471 break; 8472 } 8473 } 8474 } 8475 if (Found) 8476 continue; 8477 } 8478 8479 // We didn't find any match in our map information - generate a zero 8480 // size array section - if the pointer is a struct member we defer this 8481 // action until the whole struct has been processed. 8482 if (isa<MemberExpr>(IE)) { 8483 // Insert the pointer into Info to be processed by 8484 // generateInfoForComponentList. Because it is a member pointer 8485 // without a pointee, no entry will be generated for it, therefore 8486 // we need to generate one after the whole struct has been processed. 8487 // Nonetheless, generateInfoForComponentList must be called to take 8488 // the pointer into account for the calculation of the range of the 8489 // partial struct. 8490 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8491 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8492 nullptr); 8493 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8494 } else { 8495 llvm::Value *Ptr = 8496 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8497 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8498 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8499 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8500 UseDevicePtrCombinedInfo.Sizes.push_back( 8501 llvm::Constant::getNullValue(CGF.Int64Ty)); 8502 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8503 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8504 } 8505 } 8506 } 8507 8508 // Look at the use_device_addr clause information and mark the existing map 8509 // entries as such. If there is no map information for an entry in the 8510 // use_device_addr list, we create one with map type 'alloc' and zero size 8511 // section. It is the user fault if that was not mapped before. If there is 8512 // no map information and the pointer is a struct member, then we defer the 8513 // emission of that entry until the whole struct has been processed. 8514 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8515 for (const auto *Cl : Clauses) { 8516 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8517 if (!C) 8518 continue; 8519 for (const auto L : C->component_lists()) { 8520 assert(!std::get<1>(L).empty() && 8521 "Not expecting empty list of components!"); 8522 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8523 if (!Processed.insert(VD).second) 8524 continue; 8525 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8526 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8527 // If the first component is a member expression, we have to look into 8528 // 'this', which maps to null in the map of map information. Otherwise 8529 // look directly for the information. 8530 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8531 8532 // We potentially have map information for this declaration already. 8533 // Look for the first set of components that refer to it. 8534 if (It != Info.end()) { 8535 bool Found = false; 8536 for (auto &Data : It->second) { 8537 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8538 return MI.Components.back().getAssociatedDeclaration() == VD; 8539 }); 8540 // If we found a map entry, signal that the pointer has to be 8541 // returned and move on to the next declaration. 8542 if (CI != Data.end()) { 8543 CI->ReturnDevicePointer = true; 8544 Found = true; 8545 break; 8546 } 8547 } 8548 if (Found) 8549 continue; 8550 } 8551 8552 // We didn't find any match in our map information - generate a zero 8553 // size array section - if the pointer is a struct member we defer this 8554 // action until the whole struct has been processed. 8555 if (isa<MemberExpr>(IE)) { 8556 // Insert the pointer into Info to be processed by 8557 // generateInfoForComponentList. Because it is a member pointer 8558 // without a pointee, no entry will be generated for it, therefore 8559 // we need to generate one after the whole struct has been processed. 8560 // Nonetheless, generateInfoForComponentList must be called to take 8561 // the pointer into account for the calculation of the range of the 8562 // partial struct. 8563 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8564 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8565 nullptr, nullptr, /*ForDeviceAddr=*/true); 8566 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8567 } else { 8568 llvm::Value *Ptr; 8569 if (IE->isGLValue()) 8570 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8571 else 8572 Ptr = CGF.EmitScalarExpr(IE); 8573 CombinedInfo.Exprs.push_back(VD); 8574 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8575 CombinedInfo.Pointers.push_back(Ptr); 8576 CombinedInfo.Sizes.push_back( 8577 llvm::Constant::getNullValue(CGF.Int64Ty)); 8578 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8579 CombinedInfo.Mappers.push_back(nullptr); 8580 } 8581 } 8582 } 8583 8584 for (const auto &Data : Info) { 8585 StructRangeInfoTy PartialStruct; 8586 // Temporary generated information. 8587 MapCombinedInfoTy CurInfo; 8588 const Decl *D = Data.first; 8589 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8590 for (const auto &M : Data.second) { 8591 for (const MapInfo &L : M) { 8592 assert(!L.Components.empty() && 8593 "Not expecting declaration with no component lists."); 8594 8595 // Remember the current base pointer index. 8596 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8597 CurInfo.NonContigInfo.IsNonContiguous = 8598 L.Components.back().isNonContiguous(); 8599 generateInfoForComponentList( 8600 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8601 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8602 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8603 8604 // If this entry relates with a device pointer, set the relevant 8605 // declaration and add the 'return pointer' flag. 8606 if (L.ReturnDevicePointer) { 8607 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8608 "Unexpected number of mapped base pointers."); 8609 8610 const ValueDecl *RelevantVD = 8611 L.Components.back().getAssociatedDeclaration(); 8612 assert(RelevantVD && 8613 "No relevant declaration related with device pointer??"); 8614 8615 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8616 RelevantVD); 8617 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8618 } 8619 } 8620 } 8621 8622 // Append any pending zero-length pointers which are struct members and 8623 // used with use_device_ptr or use_device_addr. 8624 auto CI = DeferredInfo.find(Data.first); 8625 if (CI != DeferredInfo.end()) { 8626 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8627 llvm::Value *BasePtr; 8628 llvm::Value *Ptr; 8629 if (L.ForDeviceAddr) { 8630 if (L.IE->isGLValue()) 8631 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8632 else 8633 Ptr = this->CGF.EmitScalarExpr(L.IE); 8634 BasePtr = Ptr; 8635 // Entry is RETURN_PARAM. Also, set the placeholder value 8636 // MEMBER_OF=FFFF so that the entry is later updated with the 8637 // correct value of MEMBER_OF. 8638 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8639 } else { 8640 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8641 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8642 L.IE->getExprLoc()); 8643 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8644 // placeholder value MEMBER_OF=FFFF so that the entry is later 8645 // updated with the correct value of MEMBER_OF. 8646 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8647 OMP_MAP_MEMBER_OF); 8648 } 8649 CurInfo.Exprs.push_back(L.VD); 8650 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8651 CurInfo.Pointers.push_back(Ptr); 8652 CurInfo.Sizes.push_back( 8653 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8654 CurInfo.Mappers.push_back(nullptr); 8655 } 8656 } 8657 // If there is an entry in PartialStruct it means we have a struct with 8658 // individual members mapped. Emit an extra combined entry. 8659 if (PartialStruct.Base.isValid()) { 8660 CurInfo.NonContigInfo.Dims.push_back(0); 8661 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8662 } 8663 8664 // We need to append the results of this capture to what we already 8665 // have. 8666 CombinedInfo.append(CurInfo); 8667 } 8668 // Append data for use_device_ptr clauses. 8669 CombinedInfo.append(UseDevicePtrCombinedInfo); 8670 } 8671 8672 public: 8673 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8674 : CurDir(&Dir), CGF(CGF) { 8675 // Extract firstprivate clause information. 8676 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8677 for (const auto *D : C->varlists()) 8678 FirstPrivateDecls.try_emplace( 8679 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8680 // Extract implicit firstprivates from uses_allocators clauses. 8681 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8682 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8683 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8684 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8685 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8686 /*Implicit=*/true); 8687 else if (const auto *VD = dyn_cast<VarDecl>( 8688 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8689 ->getDecl())) 8690 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8691 } 8692 } 8693 // Extract device pointer clause information. 8694 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8695 for (auto L : C->component_lists()) 8696 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8697 } 8698 8699 /// Constructor for the declare mapper directive. 8700 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8701 : CurDir(&Dir), CGF(CGF) {} 8702 8703 /// Generate code for the combined entry if we have a partially mapped struct 8704 /// and take care of the mapping flags of the arguments corresponding to 8705 /// individual struct members. 8706 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8707 MapFlagsArrayTy &CurTypes, 8708 const StructRangeInfoTy &PartialStruct, 8709 const ValueDecl *VD = nullptr, 8710 bool NotTargetParams = true) const { 8711 if (CurTypes.size() == 1 && 8712 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8713 !PartialStruct.IsArraySection) 8714 return; 8715 Address LBAddr = PartialStruct.LowestElem.second; 8716 Address HBAddr = PartialStruct.HighestElem.second; 8717 if (PartialStruct.HasCompleteRecord) { 8718 LBAddr = PartialStruct.LB; 8719 HBAddr = PartialStruct.LB; 8720 } 8721 CombinedInfo.Exprs.push_back(VD); 8722 // Base is the base of the struct 8723 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8724 // Pointer is the address of the lowest element 8725 llvm::Value *LB = LBAddr.getPointer(); 8726 CombinedInfo.Pointers.push_back(LB); 8727 // There should not be a mapper for a combined entry. 8728 CombinedInfo.Mappers.push_back(nullptr); 8729 // Size is (addr of {highest+1} element) - (addr of lowest element) 8730 llvm::Value *HB = HBAddr.getPointer(); 8731 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8732 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8733 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8734 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8735 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8736 /*isSigned=*/false); 8737 CombinedInfo.Sizes.push_back(Size); 8738 // Map type is always TARGET_PARAM, if generate info for captures. 8739 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8740 : OMP_MAP_TARGET_PARAM); 8741 // If any element has the present modifier, then make sure the runtime 8742 // doesn't attempt to allocate the struct. 8743 if (CurTypes.end() != 8744 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8745 return Type & OMP_MAP_PRESENT; 8746 })) 8747 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8748 // Remove TARGET_PARAM flag from the first element 8749 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8750 8751 // All other current entries will be MEMBER_OF the combined entry 8752 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8753 // 0xFFFF in the MEMBER_OF field). 8754 OpenMPOffloadMappingFlags MemberOfFlag = 8755 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8756 for (auto &M : CurTypes) 8757 setCorrectMemberOfFlag(M, MemberOfFlag); 8758 } 8759 8760 /// Generate all the base pointers, section pointers, sizes, map types, and 8761 /// mappers for the extracted mappable expressions (all included in \a 8762 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8763 /// pair of the relevant declaration and index where it occurs is appended to 8764 /// the device pointers info array. 8765 void generateAllInfo( 8766 MapCombinedInfoTy &CombinedInfo, 8767 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8768 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8769 assert(CurDir.is<const OMPExecutableDirective *>() && 8770 "Expect a executable directive"); 8771 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8772 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8773 } 8774 8775 /// Generate all the base pointers, section pointers, sizes, map types, and 8776 /// mappers for the extracted map clauses of user-defined mapper (all included 8777 /// in \a CombinedInfo). 8778 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8779 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8780 "Expect a declare mapper directive"); 8781 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8782 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8783 } 8784 8785 /// Emit capture info for lambdas for variables captured by reference. 8786 void generateInfoForLambdaCaptures( 8787 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8788 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8789 const auto *RD = VD->getType() 8790 .getCanonicalType() 8791 .getNonReferenceType() 8792 ->getAsCXXRecordDecl(); 8793 if (!RD || !RD->isLambda()) 8794 return; 8795 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8796 LValue VDLVal = CGF.MakeAddrLValue( 8797 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8798 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8799 FieldDecl *ThisCapture = nullptr; 8800 RD->getCaptureFields(Captures, ThisCapture); 8801 if (ThisCapture) { 8802 LValue ThisLVal = 8803 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8804 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8805 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8806 VDLVal.getPointer(CGF)); 8807 CombinedInfo.Exprs.push_back(VD); 8808 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8809 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8810 CombinedInfo.Sizes.push_back( 8811 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8812 CGF.Int64Ty, /*isSigned=*/true)); 8813 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8814 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8815 CombinedInfo.Mappers.push_back(nullptr); 8816 } 8817 for (const LambdaCapture &LC : RD->captures()) { 8818 if (!LC.capturesVariable()) 8819 continue; 8820 const VarDecl *VD = LC.getCapturedVar(); 8821 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8822 continue; 8823 auto It = Captures.find(VD); 8824 assert(It != Captures.end() && "Found lambda capture without field."); 8825 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8826 if (LC.getCaptureKind() == LCK_ByRef) { 8827 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8828 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8829 VDLVal.getPointer(CGF)); 8830 CombinedInfo.Exprs.push_back(VD); 8831 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8832 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8833 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8834 CGF.getTypeSize( 8835 VD->getType().getCanonicalType().getNonReferenceType()), 8836 CGF.Int64Ty, /*isSigned=*/true)); 8837 } else { 8838 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8839 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8840 VDLVal.getPointer(CGF)); 8841 CombinedInfo.Exprs.push_back(VD); 8842 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8843 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8844 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8845 } 8846 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8847 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8848 CombinedInfo.Mappers.push_back(nullptr); 8849 } 8850 } 8851 8852 /// Set correct indices for lambdas captures. 8853 void adjustMemberOfForLambdaCaptures( 8854 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8855 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8856 MapFlagsArrayTy &Types) const { 8857 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8858 // Set correct member_of idx for all implicit lambda captures. 8859 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8860 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8861 continue; 8862 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8863 assert(BasePtr && "Unable to find base lambda address."); 8864 int TgtIdx = -1; 8865 for (unsigned J = I; J > 0; --J) { 8866 unsigned Idx = J - 1; 8867 if (Pointers[Idx] != BasePtr) 8868 continue; 8869 TgtIdx = Idx; 8870 break; 8871 } 8872 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8873 // All other current entries will be MEMBER_OF the combined entry 8874 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8875 // 0xFFFF in the MEMBER_OF field). 8876 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8877 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8878 } 8879 } 8880 8881 /// Generate the base pointers, section pointers, sizes, map types, and 8882 /// mappers associated to a given capture (all included in \a CombinedInfo). 8883 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8884 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8885 StructRangeInfoTy &PartialStruct) const { 8886 assert(!Cap->capturesVariableArrayType() && 8887 "Not expecting to generate map info for a variable array type!"); 8888 8889 // We need to know when we generating information for the first component 8890 const ValueDecl *VD = Cap->capturesThis() 8891 ? nullptr 8892 : Cap->getCapturedVar()->getCanonicalDecl(); 8893 8894 // If this declaration appears in a is_device_ptr clause we just have to 8895 // pass the pointer by value. If it is a reference to a declaration, we just 8896 // pass its value. 8897 if (DevPointersMap.count(VD)) { 8898 CombinedInfo.Exprs.push_back(VD); 8899 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8900 CombinedInfo.Pointers.push_back(Arg); 8901 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8902 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8903 /*isSigned=*/true)); 8904 CombinedInfo.Types.push_back( 8905 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8906 OMP_MAP_TARGET_PARAM); 8907 CombinedInfo.Mappers.push_back(nullptr); 8908 return; 8909 } 8910 8911 using MapData = 8912 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8913 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8914 const ValueDecl *, const Expr *>; 8915 SmallVector<MapData, 4> DeclComponentLists; 8916 assert(CurDir.is<const OMPExecutableDirective *>() && 8917 "Expect a executable directive"); 8918 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8919 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8920 const auto *EI = C->getVarRefs().begin(); 8921 for (const auto L : C->decl_component_lists(VD)) { 8922 const ValueDecl *VDecl, *Mapper; 8923 // The Expression is not correct if the mapping is implicit 8924 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8925 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8926 std::tie(VDecl, Components, Mapper) = L; 8927 assert(VDecl == VD && "We got information for the wrong declaration??"); 8928 assert(!Components.empty() && 8929 "Not expecting declaration with no component lists."); 8930 DeclComponentLists.emplace_back(Components, C->getMapType(), 8931 C->getMapTypeModifiers(), 8932 C->isImplicit(), Mapper, E); 8933 ++EI; 8934 } 8935 } 8936 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8937 const MapData &RHS) { 8938 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8939 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8940 bool HasPresent = !MapModifiers.empty() && 8941 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8942 return K == clang::OMPC_MAP_MODIFIER_present; 8943 }); 8944 bool HasAllocs = MapType == OMPC_MAP_alloc; 8945 MapModifiers = std::get<2>(RHS); 8946 MapType = std::get<1>(LHS); 8947 bool HasPresentR = 8948 !MapModifiers.empty() && 8949 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8950 return K == clang::OMPC_MAP_MODIFIER_present; 8951 }); 8952 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8953 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8954 }); 8955 8956 // Find overlapping elements (including the offset from the base element). 8957 llvm::SmallDenseMap< 8958 const MapData *, 8959 llvm::SmallVector< 8960 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8961 4> 8962 OverlappedData; 8963 size_t Count = 0; 8964 for (const MapData &L : DeclComponentLists) { 8965 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8966 OpenMPMapClauseKind MapType; 8967 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8968 bool IsImplicit; 8969 const ValueDecl *Mapper; 8970 const Expr *VarRef; 8971 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8972 L; 8973 ++Count; 8974 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8975 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8976 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8977 VarRef) = L1; 8978 auto CI = Components.rbegin(); 8979 auto CE = Components.rend(); 8980 auto SI = Components1.rbegin(); 8981 auto SE = Components1.rend(); 8982 for (; CI != CE && SI != SE; ++CI, ++SI) { 8983 if (CI->getAssociatedExpression()->getStmtClass() != 8984 SI->getAssociatedExpression()->getStmtClass()) 8985 break; 8986 // Are we dealing with different variables/fields? 8987 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8988 break; 8989 } 8990 // Found overlapping if, at least for one component, reached the head 8991 // of the components list. 8992 if (CI == CE || SI == SE) { 8993 // Ignore it if it is the same component. 8994 if (CI == CE && SI == SE) 8995 continue; 8996 const auto It = (SI == SE) ? CI : SI; 8997 // If one component is a pointer and another one is a kind of 8998 // dereference of this pointer (array subscript, section, dereference, 8999 // etc.), it is not an overlapping. 9000 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9001 std::prev(It) 9002 ->getAssociatedExpression() 9003 ->getType() 9004 ->isPointerType()) 9005 continue; 9006 const MapData &BaseData = CI == CE ? L : L1; 9007 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9008 SI == SE ? Components : Components1; 9009 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9010 OverlappedElements.getSecond().push_back(SubData); 9011 } 9012 } 9013 } 9014 // Sort the overlapped elements for each item. 9015 llvm::SmallVector<const FieldDecl *, 4> Layout; 9016 if (!OverlappedData.empty()) { 9017 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9018 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9019 while (BaseType != OrigType) { 9020 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9021 OrigType = BaseType->getPointeeOrArrayElementType(); 9022 } 9023 9024 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9025 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9026 else { 9027 const auto *RD = BaseType->getAsRecordDecl(); 9028 Layout.append(RD->field_begin(), RD->field_end()); 9029 } 9030 } 9031 for (auto &Pair : OverlappedData) { 9032 llvm::stable_sort( 9033 Pair.getSecond(), 9034 [&Layout]( 9035 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9036 OMPClauseMappableExprCommon::MappableExprComponentListRef 9037 Second) { 9038 auto CI = First.rbegin(); 9039 auto CE = First.rend(); 9040 auto SI = Second.rbegin(); 9041 auto SE = Second.rend(); 9042 for (; CI != CE && SI != SE; ++CI, ++SI) { 9043 if (CI->getAssociatedExpression()->getStmtClass() != 9044 SI->getAssociatedExpression()->getStmtClass()) 9045 break; 9046 // Are we dealing with different variables/fields? 9047 if (CI->getAssociatedDeclaration() != 9048 SI->getAssociatedDeclaration()) 9049 break; 9050 } 9051 9052 // Lists contain the same elements. 9053 if (CI == CE && SI == SE) 9054 return false; 9055 9056 // List with less elements is less than list with more elements. 9057 if (CI == CE || SI == SE) 9058 return CI == CE; 9059 9060 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9061 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9062 if (FD1->getParent() == FD2->getParent()) 9063 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9064 const auto It = 9065 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9066 return FD == FD1 || FD == FD2; 9067 }); 9068 return *It == FD1; 9069 }); 9070 } 9071 9072 // Associated with a capture, because the mapping flags depend on it. 9073 // Go through all of the elements with the overlapped elements. 9074 bool IsFirstComponentList = true; 9075 for (const auto &Pair : OverlappedData) { 9076 const MapData &L = *Pair.getFirst(); 9077 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9078 OpenMPMapClauseKind MapType; 9079 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9080 bool IsImplicit; 9081 const ValueDecl *Mapper; 9082 const Expr *VarRef; 9083 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9084 L; 9085 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9086 OverlappedComponents = Pair.getSecond(); 9087 generateInfoForComponentList( 9088 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9089 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9090 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9091 IsFirstComponentList = false; 9092 } 9093 // Go through other elements without overlapped elements. 9094 for (const MapData &L : DeclComponentLists) { 9095 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9096 OpenMPMapClauseKind MapType; 9097 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9098 bool IsImplicit; 9099 const ValueDecl *Mapper; 9100 const Expr *VarRef; 9101 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9102 L; 9103 auto It = OverlappedData.find(&L); 9104 if (It == OverlappedData.end()) 9105 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9106 Components, CombinedInfo, PartialStruct, 9107 IsFirstComponentList, IsImplicit, Mapper, 9108 /*ForDeviceAddr=*/false, VD, VarRef); 9109 IsFirstComponentList = false; 9110 } 9111 } 9112 9113 /// Generate the default map information for a given capture \a CI, 9114 /// record field declaration \a RI and captured value \a CV. 9115 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9116 const FieldDecl &RI, llvm::Value *CV, 9117 MapCombinedInfoTy &CombinedInfo) const { 9118 bool IsImplicit = true; 9119 // Do the default mapping. 9120 if (CI.capturesThis()) { 9121 CombinedInfo.Exprs.push_back(nullptr); 9122 CombinedInfo.BasePointers.push_back(CV); 9123 CombinedInfo.Pointers.push_back(CV); 9124 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9125 CombinedInfo.Sizes.push_back( 9126 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9127 CGF.Int64Ty, /*isSigned=*/true)); 9128 // Default map type. 9129 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9130 } else if (CI.capturesVariableByCopy()) { 9131 const VarDecl *VD = CI.getCapturedVar(); 9132 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9133 CombinedInfo.BasePointers.push_back(CV); 9134 CombinedInfo.Pointers.push_back(CV); 9135 if (!RI.getType()->isAnyPointerType()) { 9136 // We have to signal to the runtime captures passed by value that are 9137 // not pointers. 9138 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9139 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9140 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9141 } else { 9142 // Pointers are implicitly mapped with a zero size and no flags 9143 // (other than first map that is added for all implicit maps). 9144 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9145 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9146 } 9147 auto I = FirstPrivateDecls.find(VD); 9148 if (I != FirstPrivateDecls.end()) 9149 IsImplicit = I->getSecond(); 9150 } else { 9151 assert(CI.capturesVariable() && "Expected captured reference."); 9152 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9153 QualType ElementType = PtrTy->getPointeeType(); 9154 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9155 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9156 // The default map type for a scalar/complex type is 'to' because by 9157 // default the value doesn't have to be retrieved. For an aggregate 9158 // type, the default is 'tofrom'. 9159 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9160 const VarDecl *VD = CI.getCapturedVar(); 9161 auto I = FirstPrivateDecls.find(VD); 9162 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9163 CombinedInfo.BasePointers.push_back(CV); 9164 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9165 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9166 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9167 AlignmentSource::Decl)); 9168 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9169 } else { 9170 CombinedInfo.Pointers.push_back(CV); 9171 } 9172 if (I != FirstPrivateDecls.end()) 9173 IsImplicit = I->getSecond(); 9174 } 9175 // Every default map produces a single argument which is a target parameter. 9176 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9177 9178 // Add flag stating this is an implicit map. 9179 if (IsImplicit) 9180 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9181 9182 // No user-defined mapper for default mapping. 9183 CombinedInfo.Mappers.push_back(nullptr); 9184 } 9185 }; 9186 } // anonymous namespace 9187 9188 static void emitNonContiguousDescriptor( 9189 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9190 CGOpenMPRuntime::TargetDataInfo &Info) { 9191 CodeGenModule &CGM = CGF.CGM; 9192 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9193 &NonContigInfo = CombinedInfo.NonContigInfo; 9194 9195 // Build an array of struct descriptor_dim and then assign it to 9196 // offload_args. 9197 // 9198 // struct descriptor_dim { 9199 // uint64_t offset; 9200 // uint64_t count; 9201 // uint64_t stride 9202 // }; 9203 ASTContext &C = CGF.getContext(); 9204 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9205 RecordDecl *RD; 9206 RD = C.buildImplicitRecord("descriptor_dim"); 9207 RD->startDefinition(); 9208 addFieldToRecordDecl(C, RD, Int64Ty); 9209 addFieldToRecordDecl(C, RD, Int64Ty); 9210 addFieldToRecordDecl(C, RD, Int64Ty); 9211 RD->completeDefinition(); 9212 QualType DimTy = C.getRecordType(RD); 9213 9214 enum { OffsetFD = 0, CountFD, StrideFD }; 9215 // We need two index variable here since the size of "Dims" is the same as the 9216 // size of Components, however, the size of offset, count, and stride is equal 9217 // to the size of base declaration that is non-contiguous. 9218 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9219 // Skip emitting ir if dimension size is 1 since it cannot be 9220 // non-contiguous. 9221 if (NonContigInfo.Dims[I] == 1) 9222 continue; 9223 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9224 QualType ArrayTy = 9225 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9226 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9227 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9228 unsigned RevIdx = EE - II - 1; 9229 LValue DimsLVal = CGF.MakeAddrLValue( 9230 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9231 // Offset 9232 LValue OffsetLVal = CGF.EmitLValueForField( 9233 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9234 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9235 // Count 9236 LValue CountLVal = CGF.EmitLValueForField( 9237 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9238 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9239 // Stride 9240 LValue StrideLVal = CGF.EmitLValueForField( 9241 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9242 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9243 } 9244 // args[I] = &dims 9245 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9246 DimsAddr, CGM.Int8PtrTy); 9247 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9248 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9249 Info.PointersArray, 0, I); 9250 Address PAddr(P, CGF.getPointerAlign()); 9251 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9252 ++L; 9253 } 9254 } 9255 9256 /// Emit a string constant containing the names of the values mapped to the 9257 /// offloading runtime library. 9258 llvm::Constant * 9259 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9260 MappableExprsHandler::MappingExprInfo &MapExprs) { 9261 llvm::Constant *SrcLocStr; 9262 if (!MapExprs.getMapDecl()) { 9263 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9264 } else { 9265 std::string ExprName = ""; 9266 if (MapExprs.getMapExpr()) { 9267 PrintingPolicy P(CGF.getContext().getLangOpts()); 9268 llvm::raw_string_ostream OS(ExprName); 9269 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9270 OS.flush(); 9271 } else { 9272 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9273 } 9274 9275 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9276 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9277 const char *FileName = PLoc.getFilename(); 9278 unsigned Line = PLoc.getLine(); 9279 unsigned Column = PLoc.getColumn(); 9280 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9281 Line, Column); 9282 } 9283 return SrcLocStr; 9284 } 9285 9286 /// Emit the arrays used to pass the captures and map information to the 9287 /// offloading runtime library. If there is no map or capture information, 9288 /// return nullptr by reference. 9289 static void emitOffloadingArrays( 9290 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9291 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9292 bool IsNonContiguous = false) { 9293 CodeGenModule &CGM = CGF.CGM; 9294 ASTContext &Ctx = CGF.getContext(); 9295 9296 // Reset the array information. 9297 Info.clearArrayInfo(); 9298 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9299 9300 if (Info.NumberOfPtrs) { 9301 // Detect if we have any capture size requiring runtime evaluation of the 9302 // size so that a constant array could be eventually used. 9303 bool hasRuntimeEvaluationCaptureSize = false; 9304 for (llvm::Value *S : CombinedInfo.Sizes) 9305 if (!isa<llvm::Constant>(S)) { 9306 hasRuntimeEvaluationCaptureSize = true; 9307 break; 9308 } 9309 9310 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9311 QualType PointerArrayType = Ctx.getConstantArrayType( 9312 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9313 /*IndexTypeQuals=*/0); 9314 9315 Info.BasePointersArray = 9316 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9317 Info.PointersArray = 9318 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9319 Address MappersArray = 9320 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9321 Info.MappersArray = MappersArray.getPointer(); 9322 9323 // If we don't have any VLA types or other types that require runtime 9324 // evaluation, we can use a constant array for the map sizes, otherwise we 9325 // need to fill up the arrays as we do for the pointers. 9326 QualType Int64Ty = 9327 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9328 if (hasRuntimeEvaluationCaptureSize) { 9329 QualType SizeArrayType = Ctx.getConstantArrayType( 9330 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9331 /*IndexTypeQuals=*/0); 9332 Info.SizesArray = 9333 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9334 } else { 9335 // We expect all the sizes to be constant, so we collect them to create 9336 // a constant array. 9337 SmallVector<llvm::Constant *, 16> ConstSizes; 9338 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9339 if (IsNonContiguous && 9340 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9341 ConstSizes.push_back(llvm::ConstantInt::get( 9342 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9343 } else { 9344 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9345 } 9346 } 9347 9348 auto *SizesArrayInit = llvm::ConstantArray::get( 9349 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9350 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9351 auto *SizesArrayGbl = new llvm::GlobalVariable( 9352 CGM.getModule(), SizesArrayInit->getType(), 9353 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9354 SizesArrayInit, Name); 9355 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9356 Info.SizesArray = SizesArrayGbl; 9357 } 9358 9359 // The map types are always constant so we don't need to generate code to 9360 // fill arrays. Instead, we create an array constant. 9361 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9362 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9363 std::string MaptypesName = 9364 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9365 auto *MapTypesArrayGbl = 9366 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9367 Info.MapTypesArray = MapTypesArrayGbl; 9368 9369 // The information types are only built if there is debug information 9370 // requested. 9371 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9372 Info.MapNamesArray = llvm::Constant::getNullValue( 9373 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9374 } else { 9375 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9376 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9377 }; 9378 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9379 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9380 std::string MapnamesName = 9381 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9382 auto *MapNamesArrayGbl = 9383 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9384 Info.MapNamesArray = MapNamesArrayGbl; 9385 } 9386 9387 // If there's a present map type modifier, it must not be applied to the end 9388 // of a region, so generate a separate map type array in that case. 9389 if (Info.separateBeginEndCalls()) { 9390 bool EndMapTypesDiffer = false; 9391 for (uint64_t &Type : Mapping) { 9392 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9393 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9394 EndMapTypesDiffer = true; 9395 } 9396 } 9397 if (EndMapTypesDiffer) { 9398 MapTypesArrayGbl = 9399 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9400 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9401 } 9402 } 9403 9404 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9405 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9406 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9407 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9408 Info.BasePointersArray, 0, I); 9409 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9410 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9411 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9412 CGF.Builder.CreateStore(BPVal, BPAddr); 9413 9414 if (Info.requiresDevicePointerInfo()) 9415 if (const ValueDecl *DevVD = 9416 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9417 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9418 9419 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9420 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9421 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9422 Info.PointersArray, 0, I); 9423 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9424 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9425 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9426 CGF.Builder.CreateStore(PVal, PAddr); 9427 9428 if (hasRuntimeEvaluationCaptureSize) { 9429 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9430 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9431 Info.SizesArray, 9432 /*Idx0=*/0, 9433 /*Idx1=*/I); 9434 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9435 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9436 CGM.Int64Ty, 9437 /*isSigned=*/true), 9438 SAddr); 9439 } 9440 9441 // Fill up the mapper array. 9442 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9443 if (CombinedInfo.Mappers[I]) { 9444 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9445 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9446 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9447 Info.HasMapper = true; 9448 } 9449 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9450 CGF.Builder.CreateStore(MFunc, MAddr); 9451 } 9452 } 9453 9454 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9455 Info.NumberOfPtrs == 0) 9456 return; 9457 9458 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9459 } 9460 9461 namespace { 9462 /// Additional arguments for emitOffloadingArraysArgument function. 9463 struct ArgumentsOptions { 9464 bool ForEndCall = false; 9465 ArgumentsOptions() = default; 9466 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9467 }; 9468 } // namespace 9469 9470 /// Emit the arguments to be passed to the runtime library based on the 9471 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9472 /// ForEndCall, emit map types to be passed for the end of the region instead of 9473 /// the beginning. 9474 static void emitOffloadingArraysArgument( 9475 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9476 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9477 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9478 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9479 const ArgumentsOptions &Options = ArgumentsOptions()) { 9480 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9481 "expected region end call to runtime only when end call is separate"); 9482 CodeGenModule &CGM = CGF.CGM; 9483 if (Info.NumberOfPtrs) { 9484 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9485 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9486 Info.BasePointersArray, 9487 /*Idx0=*/0, /*Idx1=*/0); 9488 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9489 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9490 Info.PointersArray, 9491 /*Idx0=*/0, 9492 /*Idx1=*/0); 9493 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9494 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9495 /*Idx0=*/0, /*Idx1=*/0); 9496 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9497 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9498 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9499 : Info.MapTypesArray, 9500 /*Idx0=*/0, 9501 /*Idx1=*/0); 9502 9503 // Only emit the mapper information arrays if debug information is 9504 // requested. 9505 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9506 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9507 else 9508 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9509 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9510 Info.MapNamesArray, 9511 /*Idx0=*/0, 9512 /*Idx1=*/0); 9513 // If there is no user-defined mapper, set the mapper array to nullptr to 9514 // avoid an unnecessary data privatization 9515 if (!Info.HasMapper) 9516 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9517 else 9518 MappersArrayArg = 9519 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9520 } else { 9521 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9522 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9523 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9524 MapTypesArrayArg = 9525 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9526 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9527 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9528 } 9529 } 9530 9531 /// Check for inner distribute directive. 9532 static const OMPExecutableDirective * 9533 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9534 const auto *CS = D.getInnermostCapturedStmt(); 9535 const auto *Body = 9536 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9537 const Stmt *ChildStmt = 9538 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9539 9540 if (const auto *NestedDir = 9541 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9542 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9543 switch (D.getDirectiveKind()) { 9544 case OMPD_target: 9545 if (isOpenMPDistributeDirective(DKind)) 9546 return NestedDir; 9547 if (DKind == OMPD_teams) { 9548 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9549 /*IgnoreCaptured=*/true); 9550 if (!Body) 9551 return nullptr; 9552 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9553 if (const auto *NND = 9554 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9555 DKind = NND->getDirectiveKind(); 9556 if (isOpenMPDistributeDirective(DKind)) 9557 return NND; 9558 } 9559 } 9560 return nullptr; 9561 case OMPD_target_teams: 9562 if (isOpenMPDistributeDirective(DKind)) 9563 return NestedDir; 9564 return nullptr; 9565 case OMPD_target_parallel: 9566 case OMPD_target_simd: 9567 case OMPD_target_parallel_for: 9568 case OMPD_target_parallel_for_simd: 9569 return nullptr; 9570 case OMPD_target_teams_distribute: 9571 case OMPD_target_teams_distribute_simd: 9572 case OMPD_target_teams_distribute_parallel_for: 9573 case OMPD_target_teams_distribute_parallel_for_simd: 9574 case OMPD_parallel: 9575 case OMPD_for: 9576 case OMPD_parallel_for: 9577 case OMPD_parallel_master: 9578 case OMPD_parallel_sections: 9579 case OMPD_for_simd: 9580 case OMPD_parallel_for_simd: 9581 case OMPD_cancel: 9582 case OMPD_cancellation_point: 9583 case OMPD_ordered: 9584 case OMPD_threadprivate: 9585 case OMPD_allocate: 9586 case OMPD_task: 9587 case OMPD_simd: 9588 case OMPD_tile: 9589 case OMPD_unroll: 9590 case OMPD_sections: 9591 case OMPD_section: 9592 case OMPD_single: 9593 case OMPD_master: 9594 case OMPD_critical: 9595 case OMPD_taskyield: 9596 case OMPD_barrier: 9597 case OMPD_taskwait: 9598 case OMPD_taskgroup: 9599 case OMPD_atomic: 9600 case OMPD_flush: 9601 case OMPD_depobj: 9602 case OMPD_scan: 9603 case OMPD_teams: 9604 case OMPD_target_data: 9605 case OMPD_target_exit_data: 9606 case OMPD_target_enter_data: 9607 case OMPD_distribute: 9608 case OMPD_distribute_simd: 9609 case OMPD_distribute_parallel_for: 9610 case OMPD_distribute_parallel_for_simd: 9611 case OMPD_teams_distribute: 9612 case OMPD_teams_distribute_simd: 9613 case OMPD_teams_distribute_parallel_for: 9614 case OMPD_teams_distribute_parallel_for_simd: 9615 case OMPD_target_update: 9616 case OMPD_declare_simd: 9617 case OMPD_declare_variant: 9618 case OMPD_begin_declare_variant: 9619 case OMPD_end_declare_variant: 9620 case OMPD_declare_target: 9621 case OMPD_end_declare_target: 9622 case OMPD_declare_reduction: 9623 case OMPD_declare_mapper: 9624 case OMPD_taskloop: 9625 case OMPD_taskloop_simd: 9626 case OMPD_master_taskloop: 9627 case OMPD_master_taskloop_simd: 9628 case OMPD_parallel_master_taskloop: 9629 case OMPD_parallel_master_taskloop_simd: 9630 case OMPD_requires: 9631 case OMPD_unknown: 9632 default: 9633 llvm_unreachable("Unexpected directive."); 9634 } 9635 } 9636 9637 return nullptr; 9638 } 9639 9640 /// Emit the user-defined mapper function. The code generation follows the 9641 /// pattern in the example below. 9642 /// \code 9643 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9644 /// void *base, void *begin, 9645 /// int64_t size, int64_t type, 9646 /// void *name = nullptr) { 9647 /// // Allocate space for an array section first or add a base/begin for 9648 /// // pointer dereference. 9649 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9650 /// !maptype.IsDelete) 9651 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9652 /// size*sizeof(Ty), clearToFromMember(type)); 9653 /// // Map members. 9654 /// for (unsigned i = 0; i < size; i++) { 9655 /// // For each component specified by this mapper: 9656 /// for (auto c : begin[i]->all_components) { 9657 /// if (c.hasMapper()) 9658 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9659 /// c.arg_type, c.arg_name); 9660 /// else 9661 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9662 /// c.arg_begin, c.arg_size, c.arg_type, 9663 /// c.arg_name); 9664 /// } 9665 /// } 9666 /// // Delete the array section. 9667 /// if (size > 1 && maptype.IsDelete) 9668 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9669 /// size*sizeof(Ty), clearToFromMember(type)); 9670 /// } 9671 /// \endcode 9672 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9673 CodeGenFunction *CGF) { 9674 if (UDMMap.count(D) > 0) 9675 return; 9676 ASTContext &C = CGM.getContext(); 9677 QualType Ty = D->getType(); 9678 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9679 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9680 auto *MapperVarDecl = 9681 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9682 SourceLocation Loc = D->getLocation(); 9683 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9684 9685 // Prepare mapper function arguments and attributes. 9686 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9687 C.VoidPtrTy, ImplicitParamDecl::Other); 9688 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9689 ImplicitParamDecl::Other); 9690 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9691 C.VoidPtrTy, ImplicitParamDecl::Other); 9692 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9693 ImplicitParamDecl::Other); 9694 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9695 ImplicitParamDecl::Other); 9696 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9697 ImplicitParamDecl::Other); 9698 FunctionArgList Args; 9699 Args.push_back(&HandleArg); 9700 Args.push_back(&BaseArg); 9701 Args.push_back(&BeginArg); 9702 Args.push_back(&SizeArg); 9703 Args.push_back(&TypeArg); 9704 Args.push_back(&NameArg); 9705 const CGFunctionInfo &FnInfo = 9706 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9707 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9708 SmallString<64> TyStr; 9709 llvm::raw_svector_ostream Out(TyStr); 9710 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9711 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9712 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9713 Name, &CGM.getModule()); 9714 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9715 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9716 // Start the mapper function code generation. 9717 CodeGenFunction MapperCGF(CGM); 9718 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9719 // Compute the starting and end addresses of array elements. 9720 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9721 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9722 C.getPointerType(Int64Ty), Loc); 9723 // Prepare common arguments for array initiation and deletion. 9724 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9725 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9726 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9727 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9728 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9729 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9730 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9731 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9732 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9733 // Convert the size in bytes into the number of array elements. 9734 Size = MapperCGF.Builder.CreateExactUDiv( 9735 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9736 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9737 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9738 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9739 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9740 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9741 C.getPointerType(Int64Ty), Loc); 9742 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9743 MapperCGF.GetAddrOfLocalVar(&NameArg), 9744 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9745 9746 // Emit array initiation if this is an array section and \p MapType indicates 9747 // that memory allocation is required. 9748 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9749 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9750 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9751 9752 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9753 9754 // Emit the loop header block. 9755 MapperCGF.EmitBlock(HeadBB); 9756 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9757 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9758 // Evaluate whether the initial condition is satisfied. 9759 llvm::Value *IsEmpty = 9760 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9761 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9762 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9763 9764 // Emit the loop body block. 9765 MapperCGF.EmitBlock(BodyBB); 9766 llvm::BasicBlock *LastBB = BodyBB; 9767 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9768 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9769 PtrPHI->addIncoming(PtrBegin, EntryBB); 9770 Address PtrCurrent = 9771 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9772 .getAlignment() 9773 .alignmentOfArrayElement(ElementSize)); 9774 // Privatize the declared variable of mapper to be the current array element. 9775 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9776 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9777 (void)Scope.Privatize(); 9778 9779 // Get map clause information. Fill up the arrays with all mapped variables. 9780 MappableExprsHandler::MapCombinedInfoTy Info; 9781 MappableExprsHandler MEHandler(*D, MapperCGF); 9782 MEHandler.generateAllInfoForMapper(Info); 9783 9784 // Call the runtime API __tgt_mapper_num_components to get the number of 9785 // pre-existing components. 9786 llvm::Value *OffloadingArgs[] = {Handle}; 9787 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9788 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9789 OMPRTL___tgt_mapper_num_components), 9790 OffloadingArgs); 9791 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9792 PreviousSize, 9793 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9794 9795 // Fill up the runtime mapper handle for all components. 9796 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9797 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9798 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9799 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9800 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9801 llvm::Value *CurSizeArg = Info.Sizes[I]; 9802 llvm::Value *CurNameArg = 9803 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9804 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9805 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9806 9807 // Extract the MEMBER_OF field from the map type. 9808 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9809 llvm::Value *MemberMapType = 9810 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9811 9812 // Combine the map type inherited from user-defined mapper with that 9813 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9814 // bits of the \a MapType, which is the input argument of the mapper 9815 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9816 // bits of MemberMapType. 9817 // [OpenMP 5.0], 1.2.6. map-type decay. 9818 // | alloc | to | from | tofrom | release | delete 9819 // ---------------------------------------------------------- 9820 // alloc | alloc | alloc | alloc | alloc | release | delete 9821 // to | alloc | to | alloc | to | release | delete 9822 // from | alloc | alloc | from | from | release | delete 9823 // tofrom | alloc | to | from | tofrom | release | delete 9824 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9825 MapType, 9826 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9827 MappableExprsHandler::OMP_MAP_FROM)); 9828 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9829 llvm::BasicBlock *AllocElseBB = 9830 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9831 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9832 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9833 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9834 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9835 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9836 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9837 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9838 MapperCGF.EmitBlock(AllocBB); 9839 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9840 MemberMapType, 9841 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9842 MappableExprsHandler::OMP_MAP_FROM))); 9843 MapperCGF.Builder.CreateBr(EndBB); 9844 MapperCGF.EmitBlock(AllocElseBB); 9845 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9846 LeftToFrom, 9847 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9848 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9849 // In case of to, clear OMP_MAP_FROM. 9850 MapperCGF.EmitBlock(ToBB); 9851 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9852 MemberMapType, 9853 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9854 MapperCGF.Builder.CreateBr(EndBB); 9855 MapperCGF.EmitBlock(ToElseBB); 9856 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9857 LeftToFrom, 9858 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9859 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9860 // In case of from, clear OMP_MAP_TO. 9861 MapperCGF.EmitBlock(FromBB); 9862 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9863 MemberMapType, 9864 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9865 // In case of tofrom, do nothing. 9866 MapperCGF.EmitBlock(EndBB); 9867 LastBB = EndBB; 9868 llvm::PHINode *CurMapType = 9869 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9870 CurMapType->addIncoming(AllocMapType, AllocBB); 9871 CurMapType->addIncoming(ToMapType, ToBB); 9872 CurMapType->addIncoming(FromMapType, FromBB); 9873 CurMapType->addIncoming(MemberMapType, ToElseBB); 9874 9875 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9876 CurSizeArg, CurMapType, CurNameArg}; 9877 if (Info.Mappers[I]) { 9878 // Call the corresponding mapper function. 9879 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9880 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9881 assert(MapperFunc && "Expect a valid mapper function is available."); 9882 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9883 } else { 9884 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9885 // data structure. 9886 MapperCGF.EmitRuntimeCall( 9887 OMPBuilder.getOrCreateRuntimeFunction( 9888 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9889 OffloadingArgs); 9890 } 9891 } 9892 9893 // Update the pointer to point to the next element that needs to be mapped, 9894 // and check whether we have mapped all elements. 9895 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9896 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9897 PtrPHI->addIncoming(PtrNext, LastBB); 9898 llvm::Value *IsDone = 9899 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9900 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9901 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9902 9903 MapperCGF.EmitBlock(ExitBB); 9904 // Emit array deletion if this is an array section and \p MapType indicates 9905 // that deletion is required. 9906 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9907 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9908 9909 // Emit the function exit block. 9910 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9911 MapperCGF.FinishFunction(); 9912 UDMMap.try_emplace(D, Fn); 9913 if (CGF) { 9914 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9915 Decls.second.push_back(D); 9916 } 9917 } 9918 9919 /// Emit the array initialization or deletion portion for user-defined mapper 9920 /// code generation. First, it evaluates whether an array section is mapped and 9921 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9922 /// true, and \a MapType indicates to not delete this array, array 9923 /// initialization code is generated. If \a IsInit is false, and \a MapType 9924 /// indicates to not this array, array deletion code is generated. 9925 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9926 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9927 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9928 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9929 bool IsInit) { 9930 StringRef Prefix = IsInit ? ".init" : ".del"; 9931 9932 // Evaluate if this is an array section. 9933 llvm::BasicBlock *BodyBB = 9934 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9935 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9936 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9937 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9938 MapType, 9939 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9940 llvm::Value *DeleteCond; 9941 llvm::Value *Cond; 9942 if (IsInit) { 9943 // base != begin? 9944 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9945 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9946 // IsPtrAndObj? 9947 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9948 MapType, 9949 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9950 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9951 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9952 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9953 DeleteCond = MapperCGF.Builder.CreateIsNull( 9954 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9955 } else { 9956 Cond = IsArray; 9957 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9958 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9959 } 9960 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9961 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9962 9963 MapperCGF.EmitBlock(BodyBB); 9964 // Get the array size by multiplying element size and element number (i.e., \p 9965 // Size). 9966 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9967 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9968 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9969 // memory allocation/deletion purpose only. 9970 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9971 MapType, 9972 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9973 MappableExprsHandler::OMP_MAP_FROM))); 9974 MapTypeArg = MapperCGF.Builder.CreateOr( 9975 MapTypeArg, 9976 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 9977 9978 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9979 // data structure. 9980 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9981 ArraySize, MapTypeArg, MapName}; 9982 MapperCGF.EmitRuntimeCall( 9983 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9984 OMPRTL___tgt_push_mapper_component), 9985 OffloadingArgs); 9986 } 9987 9988 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9989 const OMPDeclareMapperDecl *D) { 9990 auto I = UDMMap.find(D); 9991 if (I != UDMMap.end()) 9992 return I->second; 9993 emitUserDefinedMapper(D); 9994 return UDMMap.lookup(D); 9995 } 9996 9997 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9998 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9999 llvm::Value *DeviceID, 10000 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10001 const OMPLoopDirective &D)> 10002 SizeEmitter) { 10003 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10004 const OMPExecutableDirective *TD = &D; 10005 // Get nested teams distribute kind directive, if any. 10006 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10007 TD = getNestedDistributeDirective(CGM.getContext(), D); 10008 if (!TD) 10009 return; 10010 const auto *LD = cast<OMPLoopDirective>(TD); 10011 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10012 PrePostActionTy &) { 10013 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10014 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10015 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10016 CGF.EmitRuntimeCall( 10017 OMPBuilder.getOrCreateRuntimeFunction( 10018 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10019 Args); 10020 } 10021 }; 10022 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10023 } 10024 10025 void CGOpenMPRuntime::emitTargetCall( 10026 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10027 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10028 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10029 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10030 const OMPLoopDirective &D)> 10031 SizeEmitter) { 10032 if (!CGF.HaveInsertPoint()) 10033 return; 10034 10035 assert(OutlinedFn && "Invalid outlined function!"); 10036 10037 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10038 D.hasClausesOfKind<OMPNowaitClause>(); 10039 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10040 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10041 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10042 PrePostActionTy &) { 10043 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10044 }; 10045 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10046 10047 CodeGenFunction::OMPTargetDataInfo InputInfo; 10048 llvm::Value *MapTypesArray = nullptr; 10049 llvm::Value *MapNamesArray = nullptr; 10050 // Fill up the pointer arrays and transfer execution to the device. 10051 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10052 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10053 &CapturedVars, 10054 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10055 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10056 // Reverse offloading is not supported, so just execute on the host. 10057 if (RequiresOuterTask) { 10058 CapturedVars.clear(); 10059 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10060 } 10061 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10062 return; 10063 } 10064 10065 // On top of the arrays that were filled up, the target offloading call 10066 // takes as arguments the device id as well as the host pointer. The host 10067 // pointer is used by the runtime library to identify the current target 10068 // region, so it only has to be unique and not necessarily point to 10069 // anything. It could be the pointer to the outlined function that 10070 // implements the target region, but we aren't using that so that the 10071 // compiler doesn't need to keep that, and could therefore inline the host 10072 // function if proven worthwhile during optimization. 10073 10074 // From this point on, we need to have an ID of the target region defined. 10075 assert(OutlinedFnID && "Invalid outlined function ID!"); 10076 10077 // Emit device ID if any. 10078 llvm::Value *DeviceID; 10079 if (Device.getPointer()) { 10080 assert((Device.getInt() == OMPC_DEVICE_unknown || 10081 Device.getInt() == OMPC_DEVICE_device_num) && 10082 "Expected device_num modifier."); 10083 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10084 DeviceID = 10085 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10086 } else { 10087 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10088 } 10089 10090 // Emit the number of elements in the offloading arrays. 10091 llvm::Value *PointerNum = 10092 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10093 10094 // Return value of the runtime offloading call. 10095 llvm::Value *Return; 10096 10097 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10098 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10099 10100 // Source location for the ident struct 10101 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10102 10103 // Emit tripcount for the target loop-based directive. 10104 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10105 10106 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10107 // The target region is an outlined function launched by the runtime 10108 // via calls __tgt_target() or __tgt_target_teams(). 10109 // 10110 // __tgt_target() launches a target region with one team and one thread, 10111 // executing a serial region. This master thread may in turn launch 10112 // more threads within its team upon encountering a parallel region, 10113 // however, no additional teams can be launched on the device. 10114 // 10115 // __tgt_target_teams() launches a target region with one or more teams, 10116 // each with one or more threads. This call is required for target 10117 // constructs such as: 10118 // 'target teams' 10119 // 'target' / 'teams' 10120 // 'target teams distribute parallel for' 10121 // 'target parallel' 10122 // and so on. 10123 // 10124 // Note that on the host and CPU targets, the runtime implementation of 10125 // these calls simply call the outlined function without forking threads. 10126 // The outlined functions themselves have runtime calls to 10127 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10128 // the compiler in emitTeamsCall() and emitParallelCall(). 10129 // 10130 // In contrast, on the NVPTX target, the implementation of 10131 // __tgt_target_teams() launches a GPU kernel with the requested number 10132 // of teams and threads so no additional calls to the runtime are required. 10133 if (NumTeams) { 10134 // If we have NumTeams defined this means that we have an enclosed teams 10135 // region. Therefore we also expect to have NumThreads defined. These two 10136 // values should be defined in the presence of a teams directive, 10137 // regardless of having any clauses associated. If the user is using teams 10138 // but no clauses, these two values will be the default that should be 10139 // passed to the runtime library - a 32-bit integer with the value zero. 10140 assert(NumThreads && "Thread limit expression should be available along " 10141 "with number of teams."); 10142 llvm::Value *OffloadingArgs[] = {RTLoc, 10143 DeviceID, 10144 OutlinedFnID, 10145 PointerNum, 10146 InputInfo.BasePointersArray.getPointer(), 10147 InputInfo.PointersArray.getPointer(), 10148 InputInfo.SizesArray.getPointer(), 10149 MapTypesArray, 10150 MapNamesArray, 10151 InputInfo.MappersArray.getPointer(), 10152 NumTeams, 10153 NumThreads}; 10154 Return = CGF.EmitRuntimeCall( 10155 OMPBuilder.getOrCreateRuntimeFunction( 10156 CGM.getModule(), HasNowait 10157 ? OMPRTL___tgt_target_teams_nowait_mapper 10158 : OMPRTL___tgt_target_teams_mapper), 10159 OffloadingArgs); 10160 } else { 10161 llvm::Value *OffloadingArgs[] = {RTLoc, 10162 DeviceID, 10163 OutlinedFnID, 10164 PointerNum, 10165 InputInfo.BasePointersArray.getPointer(), 10166 InputInfo.PointersArray.getPointer(), 10167 InputInfo.SizesArray.getPointer(), 10168 MapTypesArray, 10169 MapNamesArray, 10170 InputInfo.MappersArray.getPointer()}; 10171 Return = CGF.EmitRuntimeCall( 10172 OMPBuilder.getOrCreateRuntimeFunction( 10173 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10174 : OMPRTL___tgt_target_mapper), 10175 OffloadingArgs); 10176 } 10177 10178 // Check the error code and execute the host version if required. 10179 llvm::BasicBlock *OffloadFailedBlock = 10180 CGF.createBasicBlock("omp_offload.failed"); 10181 llvm::BasicBlock *OffloadContBlock = 10182 CGF.createBasicBlock("omp_offload.cont"); 10183 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10184 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10185 10186 CGF.EmitBlock(OffloadFailedBlock); 10187 if (RequiresOuterTask) { 10188 CapturedVars.clear(); 10189 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10190 } 10191 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10192 CGF.EmitBranch(OffloadContBlock); 10193 10194 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10195 }; 10196 10197 // Notify that the host version must be executed. 10198 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10199 RequiresOuterTask](CodeGenFunction &CGF, 10200 PrePostActionTy &) { 10201 if (RequiresOuterTask) { 10202 CapturedVars.clear(); 10203 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10204 } 10205 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10206 }; 10207 10208 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10209 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10210 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10211 // Fill up the arrays with all the captured variables. 10212 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10213 10214 // Get mappable expression information. 10215 MappableExprsHandler MEHandler(D, CGF); 10216 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10217 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10218 10219 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10220 auto *CV = CapturedVars.begin(); 10221 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10222 CE = CS.capture_end(); 10223 CI != CE; ++CI, ++RI, ++CV) { 10224 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10225 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10226 10227 // VLA sizes are passed to the outlined region by copy and do not have map 10228 // information associated. 10229 if (CI->capturesVariableArrayType()) { 10230 CurInfo.Exprs.push_back(nullptr); 10231 CurInfo.BasePointers.push_back(*CV); 10232 CurInfo.Pointers.push_back(*CV); 10233 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10234 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10235 // Copy to the device as an argument. No need to retrieve it. 10236 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10237 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10238 MappableExprsHandler::OMP_MAP_IMPLICIT); 10239 CurInfo.Mappers.push_back(nullptr); 10240 } else { 10241 // If we have any information in the map clause, we use it, otherwise we 10242 // just do a default mapping. 10243 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10244 if (!CI->capturesThis()) 10245 MappedVarSet.insert(CI->getCapturedVar()); 10246 else 10247 MappedVarSet.insert(nullptr); 10248 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10249 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10250 // Generate correct mapping for variables captured by reference in 10251 // lambdas. 10252 if (CI->capturesVariable()) 10253 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10254 CurInfo, LambdaPointers); 10255 } 10256 // We expect to have at least an element of information for this capture. 10257 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10258 "Non-existing map pointer for capture!"); 10259 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10260 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10261 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10262 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10263 "Inconsistent map information sizes!"); 10264 10265 // If there is an entry in PartialStruct it means we have a struct with 10266 // individual members mapped. Emit an extra combined entry. 10267 if (PartialStruct.Base.isValid()) { 10268 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10269 MEHandler.emitCombinedEntry( 10270 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10271 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10272 } 10273 10274 // We need to append the results of this capture to what we already have. 10275 CombinedInfo.append(CurInfo); 10276 } 10277 // Adjust MEMBER_OF flags for the lambdas captures. 10278 MEHandler.adjustMemberOfForLambdaCaptures( 10279 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10280 CombinedInfo.Types); 10281 // Map any list items in a map clause that were not captures because they 10282 // weren't referenced within the construct. 10283 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10284 10285 TargetDataInfo Info; 10286 // Fill up the arrays and create the arguments. 10287 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10288 emitOffloadingArraysArgument( 10289 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10290 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10291 {/*ForEndTask=*/false}); 10292 10293 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10294 InputInfo.BasePointersArray = 10295 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10296 InputInfo.PointersArray = 10297 Address(Info.PointersArray, CGM.getPointerAlign()); 10298 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10299 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10300 MapTypesArray = Info.MapTypesArray; 10301 MapNamesArray = Info.MapNamesArray; 10302 if (RequiresOuterTask) 10303 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10304 else 10305 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10306 }; 10307 10308 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10309 CodeGenFunction &CGF, PrePostActionTy &) { 10310 if (RequiresOuterTask) { 10311 CodeGenFunction::OMPTargetDataInfo InputInfo; 10312 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10313 } else { 10314 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10315 } 10316 }; 10317 10318 // If we have a target function ID it means that we need to support 10319 // offloading, otherwise, just execute on the host. We need to execute on host 10320 // regardless of the conditional in the if clause if, e.g., the user do not 10321 // specify target triples. 10322 if (OutlinedFnID) { 10323 if (IfCond) { 10324 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10325 } else { 10326 RegionCodeGenTy ThenRCG(TargetThenGen); 10327 ThenRCG(CGF); 10328 } 10329 } else { 10330 RegionCodeGenTy ElseRCG(TargetElseGen); 10331 ElseRCG(CGF); 10332 } 10333 } 10334 10335 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10336 StringRef ParentName) { 10337 if (!S) 10338 return; 10339 10340 // Codegen OMP target directives that offload compute to the device. 10341 bool RequiresDeviceCodegen = 10342 isa<OMPExecutableDirective>(S) && 10343 isOpenMPTargetExecutionDirective( 10344 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10345 10346 if (RequiresDeviceCodegen) { 10347 const auto &E = *cast<OMPExecutableDirective>(S); 10348 unsigned DeviceID; 10349 unsigned FileID; 10350 unsigned Line; 10351 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10352 FileID, Line); 10353 10354 // Is this a target region that should not be emitted as an entry point? If 10355 // so just signal we are done with this target region. 10356 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10357 ParentName, Line)) 10358 return; 10359 10360 switch (E.getDirectiveKind()) { 10361 case OMPD_target: 10362 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10363 cast<OMPTargetDirective>(E)); 10364 break; 10365 case OMPD_target_parallel: 10366 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10367 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10368 break; 10369 case OMPD_target_teams: 10370 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10371 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10372 break; 10373 case OMPD_target_teams_distribute: 10374 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10375 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10376 break; 10377 case OMPD_target_teams_distribute_simd: 10378 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10379 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10380 break; 10381 case OMPD_target_parallel_for: 10382 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10383 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10384 break; 10385 case OMPD_target_parallel_for_simd: 10386 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10387 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10388 break; 10389 case OMPD_target_simd: 10390 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10391 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10392 break; 10393 case OMPD_target_teams_distribute_parallel_for: 10394 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10395 CGM, ParentName, 10396 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10397 break; 10398 case OMPD_target_teams_distribute_parallel_for_simd: 10399 CodeGenFunction:: 10400 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10401 CGM, ParentName, 10402 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10403 break; 10404 case OMPD_parallel: 10405 case OMPD_for: 10406 case OMPD_parallel_for: 10407 case OMPD_parallel_master: 10408 case OMPD_parallel_sections: 10409 case OMPD_for_simd: 10410 case OMPD_parallel_for_simd: 10411 case OMPD_cancel: 10412 case OMPD_cancellation_point: 10413 case OMPD_ordered: 10414 case OMPD_threadprivate: 10415 case OMPD_allocate: 10416 case OMPD_task: 10417 case OMPD_simd: 10418 case OMPD_tile: 10419 case OMPD_unroll: 10420 case OMPD_sections: 10421 case OMPD_section: 10422 case OMPD_single: 10423 case OMPD_master: 10424 case OMPD_critical: 10425 case OMPD_taskyield: 10426 case OMPD_barrier: 10427 case OMPD_taskwait: 10428 case OMPD_taskgroup: 10429 case OMPD_atomic: 10430 case OMPD_flush: 10431 case OMPD_depobj: 10432 case OMPD_scan: 10433 case OMPD_teams: 10434 case OMPD_target_data: 10435 case OMPD_target_exit_data: 10436 case OMPD_target_enter_data: 10437 case OMPD_distribute: 10438 case OMPD_distribute_simd: 10439 case OMPD_distribute_parallel_for: 10440 case OMPD_distribute_parallel_for_simd: 10441 case OMPD_teams_distribute: 10442 case OMPD_teams_distribute_simd: 10443 case OMPD_teams_distribute_parallel_for: 10444 case OMPD_teams_distribute_parallel_for_simd: 10445 case OMPD_target_update: 10446 case OMPD_declare_simd: 10447 case OMPD_declare_variant: 10448 case OMPD_begin_declare_variant: 10449 case OMPD_end_declare_variant: 10450 case OMPD_declare_target: 10451 case OMPD_end_declare_target: 10452 case OMPD_declare_reduction: 10453 case OMPD_declare_mapper: 10454 case OMPD_taskloop: 10455 case OMPD_taskloop_simd: 10456 case OMPD_master_taskloop: 10457 case OMPD_master_taskloop_simd: 10458 case OMPD_parallel_master_taskloop: 10459 case OMPD_parallel_master_taskloop_simd: 10460 case OMPD_requires: 10461 case OMPD_unknown: 10462 default: 10463 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10464 } 10465 return; 10466 } 10467 10468 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10469 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10470 return; 10471 10472 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10473 return; 10474 } 10475 10476 // If this is a lambda function, look into its body. 10477 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10478 S = L->getBody(); 10479 10480 // Keep looking for target regions recursively. 10481 for (const Stmt *II : S->children()) 10482 scanForTargetRegionsFunctions(II, ParentName); 10483 } 10484 10485 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10486 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10487 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10488 if (!DevTy) 10489 return false; 10490 // Do not emit device_type(nohost) functions for the host. 10491 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10492 return true; 10493 // Do not emit device_type(host) functions for the device. 10494 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10495 return true; 10496 return false; 10497 } 10498 10499 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10500 // If emitting code for the host, we do not process FD here. Instead we do 10501 // the normal code generation. 10502 if (!CGM.getLangOpts().OpenMPIsDevice) { 10503 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10504 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10505 CGM.getLangOpts().OpenMPIsDevice)) 10506 return true; 10507 return false; 10508 } 10509 10510 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10511 // Try to detect target regions in the function. 10512 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10513 StringRef Name = CGM.getMangledName(GD); 10514 scanForTargetRegionsFunctions(FD->getBody(), Name); 10515 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10516 CGM.getLangOpts().OpenMPIsDevice)) 10517 return true; 10518 } 10519 10520 // Do not to emit function if it is not marked as declare target. 10521 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10522 AlreadyEmittedTargetDecls.count(VD) == 0; 10523 } 10524 10525 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10526 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10527 CGM.getLangOpts().OpenMPIsDevice)) 10528 return true; 10529 10530 if (!CGM.getLangOpts().OpenMPIsDevice) 10531 return false; 10532 10533 // Check if there are Ctors/Dtors in this declaration and look for target 10534 // regions in it. We use the complete variant to produce the kernel name 10535 // mangling. 10536 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10537 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10538 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10539 StringRef ParentName = 10540 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10541 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10542 } 10543 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10544 StringRef ParentName = 10545 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10546 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10547 } 10548 } 10549 10550 // Do not to emit variable if it is not marked as declare target. 10551 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10552 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10553 cast<VarDecl>(GD.getDecl())); 10554 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10555 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10556 HasRequiresUnifiedSharedMemory)) { 10557 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10558 return true; 10559 } 10560 return false; 10561 } 10562 10563 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10564 llvm::Constant *Addr) { 10565 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10566 !CGM.getLangOpts().OpenMPIsDevice) 10567 return; 10568 10569 // If we have host/nohost variables, they do not need to be registered. 10570 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10571 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10572 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10573 return; 10574 10575 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10576 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10577 if (!Res) { 10578 if (CGM.getLangOpts().OpenMPIsDevice) { 10579 // Register non-target variables being emitted in device code (debug info 10580 // may cause this). 10581 StringRef VarName = CGM.getMangledName(VD); 10582 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10583 } 10584 return; 10585 } 10586 // Register declare target variables. 10587 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10588 StringRef VarName; 10589 CharUnits VarSize; 10590 llvm::GlobalValue::LinkageTypes Linkage; 10591 10592 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10593 !HasRequiresUnifiedSharedMemory) { 10594 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10595 VarName = CGM.getMangledName(VD); 10596 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10597 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10598 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10599 } else { 10600 VarSize = CharUnits::Zero(); 10601 } 10602 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10603 // Temp solution to prevent optimizations of the internal variables. 10604 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10605 // Do not create a "ref-variable" if the original is not also available 10606 // on the host. 10607 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10608 return; 10609 std::string RefName = getName({VarName, "ref"}); 10610 if (!CGM.GetGlobalValue(RefName)) { 10611 llvm::Constant *AddrRef = 10612 getOrCreateInternalVariable(Addr->getType(), RefName); 10613 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10614 GVAddrRef->setConstant(/*Val=*/true); 10615 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10616 GVAddrRef->setInitializer(Addr); 10617 CGM.addCompilerUsedGlobal(GVAddrRef); 10618 } 10619 } 10620 } else { 10621 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10622 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10623 HasRequiresUnifiedSharedMemory)) && 10624 "Declare target attribute must link or to with unified memory."); 10625 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10626 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10627 else 10628 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10629 10630 if (CGM.getLangOpts().OpenMPIsDevice) { 10631 VarName = Addr->getName(); 10632 Addr = nullptr; 10633 } else { 10634 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10635 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10636 } 10637 VarSize = CGM.getPointerSize(); 10638 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10639 } 10640 10641 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10642 VarName, Addr, VarSize, Flags, Linkage); 10643 } 10644 10645 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10646 if (isa<FunctionDecl>(GD.getDecl()) || 10647 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10648 return emitTargetFunctions(GD); 10649 10650 return emitTargetGlobalVariable(GD); 10651 } 10652 10653 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10654 for (const VarDecl *VD : DeferredGlobalVariables) { 10655 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10656 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10657 if (!Res) 10658 continue; 10659 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10660 !HasRequiresUnifiedSharedMemory) { 10661 CGM.EmitGlobal(VD); 10662 } else { 10663 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10664 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10665 HasRequiresUnifiedSharedMemory)) && 10666 "Expected link clause or to clause with unified memory."); 10667 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10668 } 10669 } 10670 } 10671 10672 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10673 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10674 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10675 " Expected target-based directive."); 10676 } 10677 10678 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10679 for (const OMPClause *Clause : D->clauselists()) { 10680 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10681 HasRequiresUnifiedSharedMemory = true; 10682 } else if (const auto *AC = 10683 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10684 switch (AC->getAtomicDefaultMemOrderKind()) { 10685 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10686 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10687 break; 10688 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10689 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10690 break; 10691 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10692 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10693 break; 10694 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10695 break; 10696 } 10697 } 10698 } 10699 } 10700 10701 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10702 return RequiresAtomicOrdering; 10703 } 10704 10705 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10706 LangAS &AS) { 10707 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10708 return false; 10709 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10710 switch(A->getAllocatorType()) { 10711 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10712 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10713 // Not supported, fallback to the default mem space. 10714 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10715 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10716 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10717 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10718 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10719 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10720 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10721 AS = LangAS::Default; 10722 return true; 10723 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10724 llvm_unreachable("Expected predefined allocator for the variables with the " 10725 "static storage."); 10726 } 10727 return false; 10728 } 10729 10730 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10731 return HasRequiresUnifiedSharedMemory; 10732 } 10733 10734 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10735 CodeGenModule &CGM) 10736 : CGM(CGM) { 10737 if (CGM.getLangOpts().OpenMPIsDevice) { 10738 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10739 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10740 } 10741 } 10742 10743 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10744 if (CGM.getLangOpts().OpenMPIsDevice) 10745 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10746 } 10747 10748 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10749 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10750 return true; 10751 10752 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10753 // Do not to emit function if it is marked as declare target as it was already 10754 // emitted. 10755 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10756 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10757 if (auto *F = dyn_cast_or_null<llvm::Function>( 10758 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10759 return !F->isDeclaration(); 10760 return false; 10761 } 10762 return true; 10763 } 10764 10765 return !AlreadyEmittedTargetDecls.insert(D).second; 10766 } 10767 10768 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10769 // If we don't have entries or if we are emitting code for the device, we 10770 // don't need to do anything. 10771 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10772 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10773 (OffloadEntriesInfoManager.empty() && 10774 !HasEmittedDeclareTargetRegion && 10775 !HasEmittedTargetRegion)) 10776 return nullptr; 10777 10778 // Create and register the function that handles the requires directives. 10779 ASTContext &C = CGM.getContext(); 10780 10781 llvm::Function *RequiresRegFn; 10782 { 10783 CodeGenFunction CGF(CGM); 10784 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10785 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10786 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10787 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10788 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10789 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10790 // TODO: check for other requires clauses. 10791 // The requires directive takes effect only when a target region is 10792 // present in the compilation unit. Otherwise it is ignored and not 10793 // passed to the runtime. This avoids the runtime from throwing an error 10794 // for mismatching requires clauses across compilation units that don't 10795 // contain at least 1 target region. 10796 assert((HasEmittedTargetRegion || 10797 HasEmittedDeclareTargetRegion || 10798 !OffloadEntriesInfoManager.empty()) && 10799 "Target or declare target region expected."); 10800 if (HasRequiresUnifiedSharedMemory) 10801 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10802 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10803 CGM.getModule(), OMPRTL___tgt_register_requires), 10804 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10805 CGF.FinishFunction(); 10806 } 10807 return RequiresRegFn; 10808 } 10809 10810 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10811 const OMPExecutableDirective &D, 10812 SourceLocation Loc, 10813 llvm::Function *OutlinedFn, 10814 ArrayRef<llvm::Value *> CapturedVars) { 10815 if (!CGF.HaveInsertPoint()) 10816 return; 10817 10818 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10819 CodeGenFunction::RunCleanupsScope Scope(CGF); 10820 10821 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10822 llvm::Value *Args[] = { 10823 RTLoc, 10824 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10825 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10826 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10827 RealArgs.append(std::begin(Args), std::end(Args)); 10828 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10829 10830 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10831 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10832 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10833 } 10834 10835 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10836 const Expr *NumTeams, 10837 const Expr *ThreadLimit, 10838 SourceLocation Loc) { 10839 if (!CGF.HaveInsertPoint()) 10840 return; 10841 10842 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10843 10844 llvm::Value *NumTeamsVal = 10845 NumTeams 10846 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10847 CGF.CGM.Int32Ty, /* isSigned = */ true) 10848 : CGF.Builder.getInt32(0); 10849 10850 llvm::Value *ThreadLimitVal = 10851 ThreadLimit 10852 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10853 CGF.CGM.Int32Ty, /* isSigned = */ true) 10854 : CGF.Builder.getInt32(0); 10855 10856 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10857 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10858 ThreadLimitVal}; 10859 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10860 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10861 PushNumTeamsArgs); 10862 } 10863 10864 void CGOpenMPRuntime::emitTargetDataCalls( 10865 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10866 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10867 if (!CGF.HaveInsertPoint()) 10868 return; 10869 10870 // Action used to replace the default codegen action and turn privatization 10871 // off. 10872 PrePostActionTy NoPrivAction; 10873 10874 // Generate the code for the opening of the data environment. Capture all the 10875 // arguments of the runtime call by reference because they are used in the 10876 // closing of the region. 10877 auto &&BeginThenGen = [this, &D, Device, &Info, 10878 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10879 // Fill up the arrays with all the mapped variables. 10880 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10881 10882 // Get map clause information. 10883 MappableExprsHandler MEHandler(D, CGF); 10884 MEHandler.generateAllInfo(CombinedInfo); 10885 10886 // Fill up the arrays and create the arguments. 10887 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10888 /*IsNonContiguous=*/true); 10889 10890 llvm::Value *BasePointersArrayArg = nullptr; 10891 llvm::Value *PointersArrayArg = nullptr; 10892 llvm::Value *SizesArrayArg = nullptr; 10893 llvm::Value *MapTypesArrayArg = nullptr; 10894 llvm::Value *MapNamesArrayArg = nullptr; 10895 llvm::Value *MappersArrayArg = nullptr; 10896 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10897 SizesArrayArg, MapTypesArrayArg, 10898 MapNamesArrayArg, MappersArrayArg, Info); 10899 10900 // Emit device ID if any. 10901 llvm::Value *DeviceID = nullptr; 10902 if (Device) { 10903 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10904 CGF.Int64Ty, /*isSigned=*/true); 10905 } else { 10906 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10907 } 10908 10909 // Emit the number of elements in the offloading arrays. 10910 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10911 // 10912 // Source location for the ident struct 10913 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10914 10915 llvm::Value *OffloadingArgs[] = {RTLoc, 10916 DeviceID, 10917 PointerNum, 10918 BasePointersArrayArg, 10919 PointersArrayArg, 10920 SizesArrayArg, 10921 MapTypesArrayArg, 10922 MapNamesArrayArg, 10923 MappersArrayArg}; 10924 CGF.EmitRuntimeCall( 10925 OMPBuilder.getOrCreateRuntimeFunction( 10926 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10927 OffloadingArgs); 10928 10929 // If device pointer privatization is required, emit the body of the region 10930 // here. It will have to be duplicated: with and without privatization. 10931 if (!Info.CaptureDeviceAddrMap.empty()) 10932 CodeGen(CGF); 10933 }; 10934 10935 // Generate code for the closing of the data region. 10936 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10937 PrePostActionTy &) { 10938 assert(Info.isValid() && "Invalid data environment closing arguments."); 10939 10940 llvm::Value *BasePointersArrayArg = nullptr; 10941 llvm::Value *PointersArrayArg = nullptr; 10942 llvm::Value *SizesArrayArg = nullptr; 10943 llvm::Value *MapTypesArrayArg = nullptr; 10944 llvm::Value *MapNamesArrayArg = nullptr; 10945 llvm::Value *MappersArrayArg = nullptr; 10946 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10947 SizesArrayArg, MapTypesArrayArg, 10948 MapNamesArrayArg, MappersArrayArg, Info, 10949 {/*ForEndCall=*/true}); 10950 10951 // Emit device ID if any. 10952 llvm::Value *DeviceID = nullptr; 10953 if (Device) { 10954 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10955 CGF.Int64Ty, /*isSigned=*/true); 10956 } else { 10957 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10958 } 10959 10960 // Emit the number of elements in the offloading arrays. 10961 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10962 10963 // Source location for the ident struct 10964 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10965 10966 llvm::Value *OffloadingArgs[] = {RTLoc, 10967 DeviceID, 10968 PointerNum, 10969 BasePointersArrayArg, 10970 PointersArrayArg, 10971 SizesArrayArg, 10972 MapTypesArrayArg, 10973 MapNamesArrayArg, 10974 MappersArrayArg}; 10975 CGF.EmitRuntimeCall( 10976 OMPBuilder.getOrCreateRuntimeFunction( 10977 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10978 OffloadingArgs); 10979 }; 10980 10981 // If we need device pointer privatization, we need to emit the body of the 10982 // region with no privatization in the 'else' branch of the conditional. 10983 // Otherwise, we don't have to do anything. 10984 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10985 PrePostActionTy &) { 10986 if (!Info.CaptureDeviceAddrMap.empty()) { 10987 CodeGen.setAction(NoPrivAction); 10988 CodeGen(CGF); 10989 } 10990 }; 10991 10992 // We don't have to do anything to close the region if the if clause evaluates 10993 // to false. 10994 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10995 10996 if (IfCond) { 10997 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10998 } else { 10999 RegionCodeGenTy RCG(BeginThenGen); 11000 RCG(CGF); 11001 } 11002 11003 // If we don't require privatization of device pointers, we emit the body in 11004 // between the runtime calls. This avoids duplicating the body code. 11005 if (Info.CaptureDeviceAddrMap.empty()) { 11006 CodeGen.setAction(NoPrivAction); 11007 CodeGen(CGF); 11008 } 11009 11010 if (IfCond) { 11011 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11012 } else { 11013 RegionCodeGenTy RCG(EndThenGen); 11014 RCG(CGF); 11015 } 11016 } 11017 11018 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11019 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11020 const Expr *Device) { 11021 if (!CGF.HaveInsertPoint()) 11022 return; 11023 11024 assert((isa<OMPTargetEnterDataDirective>(D) || 11025 isa<OMPTargetExitDataDirective>(D) || 11026 isa<OMPTargetUpdateDirective>(D)) && 11027 "Expecting either target enter, exit data, or update directives."); 11028 11029 CodeGenFunction::OMPTargetDataInfo InputInfo; 11030 llvm::Value *MapTypesArray = nullptr; 11031 llvm::Value *MapNamesArray = nullptr; 11032 // Generate the code for the opening of the data environment. 11033 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11034 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11035 // Emit device ID if any. 11036 llvm::Value *DeviceID = nullptr; 11037 if (Device) { 11038 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11039 CGF.Int64Ty, /*isSigned=*/true); 11040 } else { 11041 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11042 } 11043 11044 // Emit the number of elements in the offloading arrays. 11045 llvm::Constant *PointerNum = 11046 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11047 11048 // Source location for the ident struct 11049 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11050 11051 llvm::Value *OffloadingArgs[] = {RTLoc, 11052 DeviceID, 11053 PointerNum, 11054 InputInfo.BasePointersArray.getPointer(), 11055 InputInfo.PointersArray.getPointer(), 11056 InputInfo.SizesArray.getPointer(), 11057 MapTypesArray, 11058 MapNamesArray, 11059 InputInfo.MappersArray.getPointer()}; 11060 11061 // Select the right runtime function call for each standalone 11062 // directive. 11063 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11064 RuntimeFunction RTLFn; 11065 switch (D.getDirectiveKind()) { 11066 case OMPD_target_enter_data: 11067 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11068 : OMPRTL___tgt_target_data_begin_mapper; 11069 break; 11070 case OMPD_target_exit_data: 11071 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11072 : OMPRTL___tgt_target_data_end_mapper; 11073 break; 11074 case OMPD_target_update: 11075 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11076 : OMPRTL___tgt_target_data_update_mapper; 11077 break; 11078 case OMPD_parallel: 11079 case OMPD_for: 11080 case OMPD_parallel_for: 11081 case OMPD_parallel_master: 11082 case OMPD_parallel_sections: 11083 case OMPD_for_simd: 11084 case OMPD_parallel_for_simd: 11085 case OMPD_cancel: 11086 case OMPD_cancellation_point: 11087 case OMPD_ordered: 11088 case OMPD_threadprivate: 11089 case OMPD_allocate: 11090 case OMPD_task: 11091 case OMPD_simd: 11092 case OMPD_tile: 11093 case OMPD_unroll: 11094 case OMPD_sections: 11095 case OMPD_section: 11096 case OMPD_single: 11097 case OMPD_master: 11098 case OMPD_critical: 11099 case OMPD_taskyield: 11100 case OMPD_barrier: 11101 case OMPD_taskwait: 11102 case OMPD_taskgroup: 11103 case OMPD_atomic: 11104 case OMPD_flush: 11105 case OMPD_depobj: 11106 case OMPD_scan: 11107 case OMPD_teams: 11108 case OMPD_target_data: 11109 case OMPD_distribute: 11110 case OMPD_distribute_simd: 11111 case OMPD_distribute_parallel_for: 11112 case OMPD_distribute_parallel_for_simd: 11113 case OMPD_teams_distribute: 11114 case OMPD_teams_distribute_simd: 11115 case OMPD_teams_distribute_parallel_for: 11116 case OMPD_teams_distribute_parallel_for_simd: 11117 case OMPD_declare_simd: 11118 case OMPD_declare_variant: 11119 case OMPD_begin_declare_variant: 11120 case OMPD_end_declare_variant: 11121 case OMPD_declare_target: 11122 case OMPD_end_declare_target: 11123 case OMPD_declare_reduction: 11124 case OMPD_declare_mapper: 11125 case OMPD_taskloop: 11126 case OMPD_taskloop_simd: 11127 case OMPD_master_taskloop: 11128 case OMPD_master_taskloop_simd: 11129 case OMPD_parallel_master_taskloop: 11130 case OMPD_parallel_master_taskloop_simd: 11131 case OMPD_target: 11132 case OMPD_target_simd: 11133 case OMPD_target_teams_distribute: 11134 case OMPD_target_teams_distribute_simd: 11135 case OMPD_target_teams_distribute_parallel_for: 11136 case OMPD_target_teams_distribute_parallel_for_simd: 11137 case OMPD_target_teams: 11138 case OMPD_target_parallel: 11139 case OMPD_target_parallel_for: 11140 case OMPD_target_parallel_for_simd: 11141 case OMPD_requires: 11142 case OMPD_unknown: 11143 default: 11144 llvm_unreachable("Unexpected standalone target data directive."); 11145 break; 11146 } 11147 CGF.EmitRuntimeCall( 11148 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11149 OffloadingArgs); 11150 }; 11151 11152 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11153 &MapNamesArray](CodeGenFunction &CGF, 11154 PrePostActionTy &) { 11155 // Fill up the arrays with all the mapped variables. 11156 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11157 11158 // Get map clause information. 11159 MappableExprsHandler MEHandler(D, CGF); 11160 MEHandler.generateAllInfo(CombinedInfo); 11161 11162 TargetDataInfo Info; 11163 // Fill up the arrays and create the arguments. 11164 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11165 /*IsNonContiguous=*/true); 11166 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11167 D.hasClausesOfKind<OMPNowaitClause>(); 11168 emitOffloadingArraysArgument( 11169 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11170 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11171 {/*ForEndTask=*/false}); 11172 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11173 InputInfo.BasePointersArray = 11174 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11175 InputInfo.PointersArray = 11176 Address(Info.PointersArray, CGM.getPointerAlign()); 11177 InputInfo.SizesArray = 11178 Address(Info.SizesArray, CGM.getPointerAlign()); 11179 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11180 MapTypesArray = Info.MapTypesArray; 11181 MapNamesArray = Info.MapNamesArray; 11182 if (RequiresOuterTask) 11183 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11184 else 11185 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11186 }; 11187 11188 if (IfCond) { 11189 emitIfClause(CGF, IfCond, TargetThenGen, 11190 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11191 } else { 11192 RegionCodeGenTy ThenRCG(TargetThenGen); 11193 ThenRCG(CGF); 11194 } 11195 } 11196 11197 namespace { 11198 /// Kind of parameter in a function with 'declare simd' directive. 11199 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11200 /// Attribute set of the parameter. 11201 struct ParamAttrTy { 11202 ParamKindTy Kind = Vector; 11203 llvm::APSInt StrideOrArg; 11204 llvm::APSInt Alignment; 11205 }; 11206 } // namespace 11207 11208 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11209 ArrayRef<ParamAttrTy> ParamAttrs) { 11210 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11211 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11212 // of that clause. The VLEN value must be power of 2. 11213 // In other case the notion of the function`s "characteristic data type" (CDT) 11214 // is used to compute the vector length. 11215 // CDT is defined in the following order: 11216 // a) For non-void function, the CDT is the return type. 11217 // b) If the function has any non-uniform, non-linear parameters, then the 11218 // CDT is the type of the first such parameter. 11219 // c) If the CDT determined by a) or b) above is struct, union, or class 11220 // type which is pass-by-value (except for the type that maps to the 11221 // built-in complex data type), the characteristic data type is int. 11222 // d) If none of the above three cases is applicable, the CDT is int. 11223 // The VLEN is then determined based on the CDT and the size of vector 11224 // register of that ISA for which current vector version is generated. The 11225 // VLEN is computed using the formula below: 11226 // VLEN = sizeof(vector_register) / sizeof(CDT), 11227 // where vector register size specified in section 3.2.1 Registers and the 11228 // Stack Frame of original AMD64 ABI document. 11229 QualType RetType = FD->getReturnType(); 11230 if (RetType.isNull()) 11231 return 0; 11232 ASTContext &C = FD->getASTContext(); 11233 QualType CDT; 11234 if (!RetType.isNull() && !RetType->isVoidType()) { 11235 CDT = RetType; 11236 } else { 11237 unsigned Offset = 0; 11238 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11239 if (ParamAttrs[Offset].Kind == Vector) 11240 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11241 ++Offset; 11242 } 11243 if (CDT.isNull()) { 11244 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11245 if (ParamAttrs[I + Offset].Kind == Vector) { 11246 CDT = FD->getParamDecl(I)->getType(); 11247 break; 11248 } 11249 } 11250 } 11251 } 11252 if (CDT.isNull()) 11253 CDT = C.IntTy; 11254 CDT = CDT->getCanonicalTypeUnqualified(); 11255 if (CDT->isRecordType() || CDT->isUnionType()) 11256 CDT = C.IntTy; 11257 return C.getTypeSize(CDT); 11258 } 11259 11260 static void 11261 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11262 const llvm::APSInt &VLENVal, 11263 ArrayRef<ParamAttrTy> ParamAttrs, 11264 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11265 struct ISADataTy { 11266 char ISA; 11267 unsigned VecRegSize; 11268 }; 11269 ISADataTy ISAData[] = { 11270 { 11271 'b', 128 11272 }, // SSE 11273 { 11274 'c', 256 11275 }, // AVX 11276 { 11277 'd', 256 11278 }, // AVX2 11279 { 11280 'e', 512 11281 }, // AVX512 11282 }; 11283 llvm::SmallVector<char, 2> Masked; 11284 switch (State) { 11285 case OMPDeclareSimdDeclAttr::BS_Undefined: 11286 Masked.push_back('N'); 11287 Masked.push_back('M'); 11288 break; 11289 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11290 Masked.push_back('N'); 11291 break; 11292 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11293 Masked.push_back('M'); 11294 break; 11295 } 11296 for (char Mask : Masked) { 11297 for (const ISADataTy &Data : ISAData) { 11298 SmallString<256> Buffer; 11299 llvm::raw_svector_ostream Out(Buffer); 11300 Out << "_ZGV" << Data.ISA << Mask; 11301 if (!VLENVal) { 11302 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11303 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11304 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11305 } else { 11306 Out << VLENVal; 11307 } 11308 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11309 switch (ParamAttr.Kind){ 11310 case LinearWithVarStride: 11311 Out << 's' << ParamAttr.StrideOrArg; 11312 break; 11313 case Linear: 11314 Out << 'l'; 11315 if (ParamAttr.StrideOrArg != 1) 11316 Out << ParamAttr.StrideOrArg; 11317 break; 11318 case Uniform: 11319 Out << 'u'; 11320 break; 11321 case Vector: 11322 Out << 'v'; 11323 break; 11324 } 11325 if (!!ParamAttr.Alignment) 11326 Out << 'a' << ParamAttr.Alignment; 11327 } 11328 Out << '_' << Fn->getName(); 11329 Fn->addFnAttr(Out.str()); 11330 } 11331 } 11332 } 11333 11334 // This are the Functions that are needed to mangle the name of the 11335 // vector functions generated by the compiler, according to the rules 11336 // defined in the "Vector Function ABI specifications for AArch64", 11337 // available at 11338 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11339 11340 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11341 /// 11342 /// TODO: Need to implement the behavior for reference marked with a 11343 /// var or no linear modifiers (1.b in the section). For this, we 11344 /// need to extend ParamKindTy to support the linear modifiers. 11345 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11346 QT = QT.getCanonicalType(); 11347 11348 if (QT->isVoidType()) 11349 return false; 11350 11351 if (Kind == ParamKindTy::Uniform) 11352 return false; 11353 11354 if (Kind == ParamKindTy::Linear) 11355 return false; 11356 11357 // TODO: Handle linear references with modifiers 11358 11359 if (Kind == ParamKindTy::LinearWithVarStride) 11360 return false; 11361 11362 return true; 11363 } 11364 11365 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11366 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11367 QT = QT.getCanonicalType(); 11368 unsigned Size = C.getTypeSize(QT); 11369 11370 // Only scalars and complex within 16 bytes wide set PVB to true. 11371 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11372 return false; 11373 11374 if (QT->isFloatingType()) 11375 return true; 11376 11377 if (QT->isIntegerType()) 11378 return true; 11379 11380 if (QT->isPointerType()) 11381 return true; 11382 11383 // TODO: Add support for complex types (section 3.1.2, item 2). 11384 11385 return false; 11386 } 11387 11388 /// Computes the lane size (LS) of a return type or of an input parameter, 11389 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11390 /// TODO: Add support for references, section 3.2.1, item 1. 11391 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11392 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11393 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11394 if (getAArch64PBV(PTy, C)) 11395 return C.getTypeSize(PTy); 11396 } 11397 if (getAArch64PBV(QT, C)) 11398 return C.getTypeSize(QT); 11399 11400 return C.getTypeSize(C.getUIntPtrType()); 11401 } 11402 11403 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11404 // signature of the scalar function, as defined in 3.2.2 of the 11405 // AAVFABI. 11406 static std::tuple<unsigned, unsigned, bool> 11407 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11408 QualType RetType = FD->getReturnType().getCanonicalType(); 11409 11410 ASTContext &C = FD->getASTContext(); 11411 11412 bool OutputBecomesInput = false; 11413 11414 llvm::SmallVector<unsigned, 8> Sizes; 11415 if (!RetType->isVoidType()) { 11416 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11417 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11418 OutputBecomesInput = true; 11419 } 11420 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11421 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11422 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11423 } 11424 11425 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11426 // The LS of a function parameter / return value can only be a power 11427 // of 2, starting from 8 bits, up to 128. 11428 assert(std::all_of(Sizes.begin(), Sizes.end(), 11429 [](unsigned Size) { 11430 return Size == 8 || Size == 16 || Size == 32 || 11431 Size == 64 || Size == 128; 11432 }) && 11433 "Invalid size"); 11434 11435 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11436 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11437 OutputBecomesInput); 11438 } 11439 11440 /// Mangle the parameter part of the vector function name according to 11441 /// their OpenMP classification. The mangling function is defined in 11442 /// section 3.5 of the AAVFABI. 11443 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11444 SmallString<256> Buffer; 11445 llvm::raw_svector_ostream Out(Buffer); 11446 for (const auto &ParamAttr : ParamAttrs) { 11447 switch (ParamAttr.Kind) { 11448 case LinearWithVarStride: 11449 Out << "ls" << ParamAttr.StrideOrArg; 11450 break; 11451 case Linear: 11452 Out << 'l'; 11453 // Don't print the step value if it is not present or if it is 11454 // equal to 1. 11455 if (ParamAttr.StrideOrArg != 1) 11456 Out << ParamAttr.StrideOrArg; 11457 break; 11458 case Uniform: 11459 Out << 'u'; 11460 break; 11461 case Vector: 11462 Out << 'v'; 11463 break; 11464 } 11465 11466 if (!!ParamAttr.Alignment) 11467 Out << 'a' << ParamAttr.Alignment; 11468 } 11469 11470 return std::string(Out.str()); 11471 } 11472 11473 // Function used to add the attribute. The parameter `VLEN` is 11474 // templated to allow the use of "x" when targeting scalable functions 11475 // for SVE. 11476 template <typename T> 11477 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11478 char ISA, StringRef ParSeq, 11479 StringRef MangledName, bool OutputBecomesInput, 11480 llvm::Function *Fn) { 11481 SmallString<256> Buffer; 11482 llvm::raw_svector_ostream Out(Buffer); 11483 Out << Prefix << ISA << LMask << VLEN; 11484 if (OutputBecomesInput) 11485 Out << "v"; 11486 Out << ParSeq << "_" << MangledName; 11487 Fn->addFnAttr(Out.str()); 11488 } 11489 11490 // Helper function to generate the Advanced SIMD names depending on 11491 // the value of the NDS when simdlen is not present. 11492 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11493 StringRef Prefix, char ISA, 11494 StringRef ParSeq, StringRef MangledName, 11495 bool OutputBecomesInput, 11496 llvm::Function *Fn) { 11497 switch (NDS) { 11498 case 8: 11499 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11500 OutputBecomesInput, Fn); 11501 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11502 OutputBecomesInput, Fn); 11503 break; 11504 case 16: 11505 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11506 OutputBecomesInput, Fn); 11507 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11508 OutputBecomesInput, Fn); 11509 break; 11510 case 32: 11511 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11512 OutputBecomesInput, Fn); 11513 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11514 OutputBecomesInput, Fn); 11515 break; 11516 case 64: 11517 case 128: 11518 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11519 OutputBecomesInput, Fn); 11520 break; 11521 default: 11522 llvm_unreachable("Scalar type is too wide."); 11523 } 11524 } 11525 11526 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11527 static void emitAArch64DeclareSimdFunction( 11528 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11529 ArrayRef<ParamAttrTy> ParamAttrs, 11530 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11531 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11532 11533 // Get basic data for building the vector signature. 11534 const auto Data = getNDSWDS(FD, ParamAttrs); 11535 const unsigned NDS = std::get<0>(Data); 11536 const unsigned WDS = std::get<1>(Data); 11537 const bool OutputBecomesInput = std::get<2>(Data); 11538 11539 // Check the values provided via `simdlen` by the user. 11540 // 1. A `simdlen(1)` doesn't produce vector signatures, 11541 if (UserVLEN == 1) { 11542 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11543 DiagnosticsEngine::Warning, 11544 "The clause simdlen(1) has no effect when targeting aarch64."); 11545 CGM.getDiags().Report(SLoc, DiagID); 11546 return; 11547 } 11548 11549 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11550 // Advanced SIMD output. 11551 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11552 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11553 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11554 "power of 2 when targeting Advanced SIMD."); 11555 CGM.getDiags().Report(SLoc, DiagID); 11556 return; 11557 } 11558 11559 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11560 // limits. 11561 if (ISA == 's' && UserVLEN != 0) { 11562 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11563 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11564 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11565 "lanes in the architectural constraints " 11566 "for SVE (min is 128-bit, max is " 11567 "2048-bit, by steps of 128-bit)"); 11568 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11569 return; 11570 } 11571 } 11572 11573 // Sort out parameter sequence. 11574 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11575 StringRef Prefix = "_ZGV"; 11576 // Generate simdlen from user input (if any). 11577 if (UserVLEN) { 11578 if (ISA == 's') { 11579 // SVE generates only a masked function. 11580 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11581 OutputBecomesInput, Fn); 11582 } else { 11583 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11584 // Advanced SIMD generates one or two functions, depending on 11585 // the `[not]inbranch` clause. 11586 switch (State) { 11587 case OMPDeclareSimdDeclAttr::BS_Undefined: 11588 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11589 OutputBecomesInput, Fn); 11590 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11591 OutputBecomesInput, Fn); 11592 break; 11593 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11594 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11595 OutputBecomesInput, Fn); 11596 break; 11597 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11598 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11599 OutputBecomesInput, Fn); 11600 break; 11601 } 11602 } 11603 } else { 11604 // If no user simdlen is provided, follow the AAVFABI rules for 11605 // generating the vector length. 11606 if (ISA == 's') { 11607 // SVE, section 3.4.1, item 1. 11608 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11609 OutputBecomesInput, Fn); 11610 } else { 11611 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11612 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11613 // two vector names depending on the use of the clause 11614 // `[not]inbranch`. 11615 switch (State) { 11616 case OMPDeclareSimdDeclAttr::BS_Undefined: 11617 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11618 OutputBecomesInput, Fn); 11619 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11620 OutputBecomesInput, Fn); 11621 break; 11622 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11623 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11624 OutputBecomesInput, Fn); 11625 break; 11626 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11627 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11628 OutputBecomesInput, Fn); 11629 break; 11630 } 11631 } 11632 } 11633 } 11634 11635 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11636 llvm::Function *Fn) { 11637 ASTContext &C = CGM.getContext(); 11638 FD = FD->getMostRecentDecl(); 11639 // Map params to their positions in function decl. 11640 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11641 if (isa<CXXMethodDecl>(FD)) 11642 ParamPositions.try_emplace(FD, 0); 11643 unsigned ParamPos = ParamPositions.size(); 11644 for (const ParmVarDecl *P : FD->parameters()) { 11645 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11646 ++ParamPos; 11647 } 11648 while (FD) { 11649 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11650 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11651 // Mark uniform parameters. 11652 for (const Expr *E : Attr->uniforms()) { 11653 E = E->IgnoreParenImpCasts(); 11654 unsigned Pos; 11655 if (isa<CXXThisExpr>(E)) { 11656 Pos = ParamPositions[FD]; 11657 } else { 11658 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11659 ->getCanonicalDecl(); 11660 Pos = ParamPositions[PVD]; 11661 } 11662 ParamAttrs[Pos].Kind = Uniform; 11663 } 11664 // Get alignment info. 11665 auto NI = Attr->alignments_begin(); 11666 for (const Expr *E : Attr->aligneds()) { 11667 E = E->IgnoreParenImpCasts(); 11668 unsigned Pos; 11669 QualType ParmTy; 11670 if (isa<CXXThisExpr>(E)) { 11671 Pos = ParamPositions[FD]; 11672 ParmTy = E->getType(); 11673 } else { 11674 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11675 ->getCanonicalDecl(); 11676 Pos = ParamPositions[PVD]; 11677 ParmTy = PVD->getType(); 11678 } 11679 ParamAttrs[Pos].Alignment = 11680 (*NI) 11681 ? (*NI)->EvaluateKnownConstInt(C) 11682 : llvm::APSInt::getUnsigned( 11683 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11684 .getQuantity()); 11685 ++NI; 11686 } 11687 // Mark linear parameters. 11688 auto SI = Attr->steps_begin(); 11689 auto MI = Attr->modifiers_begin(); 11690 for (const Expr *E : Attr->linears()) { 11691 E = E->IgnoreParenImpCasts(); 11692 unsigned Pos; 11693 // Rescaling factor needed to compute the linear parameter 11694 // value in the mangled name. 11695 unsigned PtrRescalingFactor = 1; 11696 if (isa<CXXThisExpr>(E)) { 11697 Pos = ParamPositions[FD]; 11698 } else { 11699 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11700 ->getCanonicalDecl(); 11701 Pos = ParamPositions[PVD]; 11702 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11703 PtrRescalingFactor = CGM.getContext() 11704 .getTypeSizeInChars(P->getPointeeType()) 11705 .getQuantity(); 11706 } 11707 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11708 ParamAttr.Kind = Linear; 11709 // Assuming a stride of 1, for `linear` without modifiers. 11710 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11711 if (*SI) { 11712 Expr::EvalResult Result; 11713 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11714 if (const auto *DRE = 11715 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11716 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11717 ParamAttr.Kind = LinearWithVarStride; 11718 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11719 ParamPositions[StridePVD->getCanonicalDecl()]); 11720 } 11721 } 11722 } else { 11723 ParamAttr.StrideOrArg = Result.Val.getInt(); 11724 } 11725 } 11726 // If we are using a linear clause on a pointer, we need to 11727 // rescale the value of linear_step with the byte size of the 11728 // pointee type. 11729 if (Linear == ParamAttr.Kind) 11730 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11731 ++SI; 11732 ++MI; 11733 } 11734 llvm::APSInt VLENVal; 11735 SourceLocation ExprLoc; 11736 const Expr *VLENExpr = Attr->getSimdlen(); 11737 if (VLENExpr) { 11738 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11739 ExprLoc = VLENExpr->getExprLoc(); 11740 } 11741 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11742 if (CGM.getTriple().isX86()) { 11743 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11744 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11745 unsigned VLEN = VLENVal.getExtValue(); 11746 StringRef MangledName = Fn->getName(); 11747 if (CGM.getTarget().hasFeature("sve")) 11748 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11749 MangledName, 's', 128, Fn, ExprLoc); 11750 if (CGM.getTarget().hasFeature("neon")) 11751 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11752 MangledName, 'n', 128, Fn, ExprLoc); 11753 } 11754 } 11755 FD = FD->getPreviousDecl(); 11756 } 11757 } 11758 11759 namespace { 11760 /// Cleanup action for doacross support. 11761 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11762 public: 11763 static const int DoacrossFinArgs = 2; 11764 11765 private: 11766 llvm::FunctionCallee RTLFn; 11767 llvm::Value *Args[DoacrossFinArgs]; 11768 11769 public: 11770 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11771 ArrayRef<llvm::Value *> CallArgs) 11772 : RTLFn(RTLFn) { 11773 assert(CallArgs.size() == DoacrossFinArgs); 11774 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11775 } 11776 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11777 if (!CGF.HaveInsertPoint()) 11778 return; 11779 CGF.EmitRuntimeCall(RTLFn, Args); 11780 } 11781 }; 11782 } // namespace 11783 11784 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11785 const OMPLoopDirective &D, 11786 ArrayRef<Expr *> NumIterations) { 11787 if (!CGF.HaveInsertPoint()) 11788 return; 11789 11790 ASTContext &C = CGM.getContext(); 11791 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11792 RecordDecl *RD; 11793 if (KmpDimTy.isNull()) { 11794 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11795 // kmp_int64 lo; // lower 11796 // kmp_int64 up; // upper 11797 // kmp_int64 st; // stride 11798 // }; 11799 RD = C.buildImplicitRecord("kmp_dim"); 11800 RD->startDefinition(); 11801 addFieldToRecordDecl(C, RD, Int64Ty); 11802 addFieldToRecordDecl(C, RD, Int64Ty); 11803 addFieldToRecordDecl(C, RD, Int64Ty); 11804 RD->completeDefinition(); 11805 KmpDimTy = C.getRecordType(RD); 11806 } else { 11807 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11808 } 11809 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11810 QualType ArrayTy = 11811 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11812 11813 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11814 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11815 enum { LowerFD = 0, UpperFD, StrideFD }; 11816 // Fill dims with data. 11817 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11818 LValue DimsLVal = CGF.MakeAddrLValue( 11819 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11820 // dims.upper = num_iterations; 11821 LValue UpperLVal = CGF.EmitLValueForField( 11822 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11823 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11824 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11825 Int64Ty, NumIterations[I]->getExprLoc()); 11826 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11827 // dims.stride = 1; 11828 LValue StrideLVal = CGF.EmitLValueForField( 11829 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11830 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11831 StrideLVal); 11832 } 11833 11834 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11835 // kmp_int32 num_dims, struct kmp_dim * dims); 11836 llvm::Value *Args[] = { 11837 emitUpdateLocation(CGF, D.getBeginLoc()), 11838 getThreadID(CGF, D.getBeginLoc()), 11839 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11840 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11841 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11842 CGM.VoidPtrTy)}; 11843 11844 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11845 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11846 CGF.EmitRuntimeCall(RTLFn, Args); 11847 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11848 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11849 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11850 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11851 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11852 llvm::makeArrayRef(FiniArgs)); 11853 } 11854 11855 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11856 const OMPDependClause *C) { 11857 QualType Int64Ty = 11858 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11859 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11860 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11861 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11862 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11863 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11864 const Expr *CounterVal = C->getLoopData(I); 11865 assert(CounterVal); 11866 llvm::Value *CntVal = CGF.EmitScalarConversion( 11867 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11868 CounterVal->getExprLoc()); 11869 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11870 /*Volatile=*/false, Int64Ty); 11871 } 11872 llvm::Value *Args[] = { 11873 emitUpdateLocation(CGF, C->getBeginLoc()), 11874 getThreadID(CGF, C->getBeginLoc()), 11875 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11876 llvm::FunctionCallee RTLFn; 11877 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11878 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11879 OMPRTL___kmpc_doacross_post); 11880 } else { 11881 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11882 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11883 OMPRTL___kmpc_doacross_wait); 11884 } 11885 CGF.EmitRuntimeCall(RTLFn, Args); 11886 } 11887 11888 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11889 llvm::FunctionCallee Callee, 11890 ArrayRef<llvm::Value *> Args) const { 11891 assert(Loc.isValid() && "Outlined function call location must be valid."); 11892 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11893 11894 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11895 if (Fn->doesNotThrow()) { 11896 CGF.EmitNounwindRuntimeCall(Fn, Args); 11897 return; 11898 } 11899 } 11900 CGF.EmitRuntimeCall(Callee, Args); 11901 } 11902 11903 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11904 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11905 ArrayRef<llvm::Value *> Args) const { 11906 emitCall(CGF, Loc, OutlinedFn, Args); 11907 } 11908 11909 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11910 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11911 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11912 HasEmittedDeclareTargetRegion = true; 11913 } 11914 11915 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11916 const VarDecl *NativeParam, 11917 const VarDecl *TargetParam) const { 11918 return CGF.GetAddrOfLocalVar(NativeParam); 11919 } 11920 11921 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11922 const VarDecl *VD) { 11923 if (!VD) 11924 return Address::invalid(); 11925 Address UntiedAddr = Address::invalid(); 11926 Address UntiedRealAddr = Address::invalid(); 11927 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11928 if (It != FunctionToUntiedTaskStackMap.end()) { 11929 const UntiedLocalVarsAddressesMap &UntiedData = 11930 UntiedLocalVarsStack[It->second]; 11931 auto I = UntiedData.find(VD); 11932 if (I != UntiedData.end()) { 11933 UntiedAddr = I->second.first; 11934 UntiedRealAddr = I->second.second; 11935 } 11936 } 11937 const VarDecl *CVD = VD->getCanonicalDecl(); 11938 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11939 // Use the default allocation. 11940 if (!isAllocatableDecl(VD)) 11941 return UntiedAddr; 11942 llvm::Value *Size; 11943 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11944 if (CVD->getType()->isVariablyModifiedType()) { 11945 Size = CGF.getTypeSize(CVD->getType()); 11946 // Align the size: ((size + align - 1) / align) * align 11947 Size = CGF.Builder.CreateNUWAdd( 11948 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11949 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11950 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11951 } else { 11952 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11953 Size = CGM.getSize(Sz.alignTo(Align)); 11954 } 11955 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11956 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11957 assert(AA->getAllocator() && 11958 "Expected allocator expression for non-default allocator."); 11959 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11960 // According to the standard, the original allocator type is a enum 11961 // (integer). Convert to pointer type, if required. 11962 Allocator = CGF.EmitScalarConversion( 11963 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11964 AA->getAllocator()->getExprLoc()); 11965 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11966 11967 llvm::Value *Addr = 11968 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11969 CGM.getModule(), OMPRTL___kmpc_alloc), 11970 Args, getName({CVD->getName(), ".void.addr"})); 11971 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11972 CGM.getModule(), OMPRTL___kmpc_free); 11973 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11974 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11975 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11976 if (UntiedAddr.isValid()) 11977 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11978 11979 // Cleanup action for allocate support. 11980 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11981 llvm::FunctionCallee RTLFn; 11982 unsigned LocEncoding; 11983 Address Addr; 11984 const Expr *Allocator; 11985 11986 public: 11987 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11988 Address Addr, const Expr *Allocator) 11989 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11990 Allocator(Allocator) {} 11991 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11992 if (!CGF.HaveInsertPoint()) 11993 return; 11994 llvm::Value *Args[3]; 11995 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11996 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11997 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11998 Addr.getPointer(), CGF.VoidPtrTy); 11999 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12000 // According to the standard, the original allocator type is a enum 12001 // (integer). Convert to pointer type, if required. 12002 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12003 CGF.getContext().VoidPtrTy, 12004 Allocator->getExprLoc()); 12005 Args[2] = AllocVal; 12006 12007 CGF.EmitRuntimeCall(RTLFn, Args); 12008 } 12009 }; 12010 Address VDAddr = 12011 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12012 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12013 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12014 VDAddr, AA->getAllocator()); 12015 if (UntiedRealAddr.isValid()) 12016 if (auto *Region = 12017 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12018 Region->emitUntiedSwitch(CGF); 12019 return VDAddr; 12020 } 12021 return UntiedAddr; 12022 } 12023 12024 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12025 const VarDecl *VD) const { 12026 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12027 if (It == FunctionToUntiedTaskStackMap.end()) 12028 return false; 12029 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12030 } 12031 12032 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12033 CodeGenModule &CGM, const OMPLoopDirective &S) 12034 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12035 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12036 if (!NeedToPush) 12037 return; 12038 NontemporalDeclsSet &DS = 12039 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12040 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12041 for (const Stmt *Ref : C->private_refs()) { 12042 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12043 const ValueDecl *VD; 12044 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12045 VD = DRE->getDecl(); 12046 } else { 12047 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12048 assert((ME->isImplicitCXXThis() || 12049 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12050 "Expected member of current class."); 12051 VD = ME->getMemberDecl(); 12052 } 12053 DS.insert(VD); 12054 } 12055 } 12056 } 12057 12058 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12059 if (!NeedToPush) 12060 return; 12061 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12062 } 12063 12064 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12065 CodeGenFunction &CGF, 12066 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12067 std::pair<Address, Address>> &LocalVars) 12068 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12069 if (!NeedToPush) 12070 return; 12071 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12072 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12073 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12074 } 12075 12076 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12077 if (!NeedToPush) 12078 return; 12079 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12080 } 12081 12082 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12083 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12084 12085 return llvm::any_of( 12086 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12087 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12088 } 12089 12090 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12091 const OMPExecutableDirective &S, 12092 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12093 const { 12094 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12095 // Vars in target/task regions must be excluded completely. 12096 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12097 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12098 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12099 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12100 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12101 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12102 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12103 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12104 } 12105 } 12106 // Exclude vars in private clauses. 12107 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12108 for (const Expr *Ref : C->varlists()) { 12109 if (!Ref->getType()->isScalarType()) 12110 continue; 12111 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12112 if (!DRE) 12113 continue; 12114 NeedToCheckForLPCs.insert(DRE->getDecl()); 12115 } 12116 } 12117 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12118 for (const Expr *Ref : C->varlists()) { 12119 if (!Ref->getType()->isScalarType()) 12120 continue; 12121 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12122 if (!DRE) 12123 continue; 12124 NeedToCheckForLPCs.insert(DRE->getDecl()); 12125 } 12126 } 12127 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12128 for (const Expr *Ref : C->varlists()) { 12129 if (!Ref->getType()->isScalarType()) 12130 continue; 12131 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12132 if (!DRE) 12133 continue; 12134 NeedToCheckForLPCs.insert(DRE->getDecl()); 12135 } 12136 } 12137 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12138 for (const Expr *Ref : C->varlists()) { 12139 if (!Ref->getType()->isScalarType()) 12140 continue; 12141 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12142 if (!DRE) 12143 continue; 12144 NeedToCheckForLPCs.insert(DRE->getDecl()); 12145 } 12146 } 12147 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12148 for (const Expr *Ref : C->varlists()) { 12149 if (!Ref->getType()->isScalarType()) 12150 continue; 12151 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12152 if (!DRE) 12153 continue; 12154 NeedToCheckForLPCs.insert(DRE->getDecl()); 12155 } 12156 } 12157 for (const Decl *VD : NeedToCheckForLPCs) { 12158 for (const LastprivateConditionalData &Data : 12159 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12160 if (Data.DeclToUniqueName.count(VD) > 0) { 12161 if (!Data.Disabled) 12162 NeedToAddForLPCsAsDisabled.insert(VD); 12163 break; 12164 } 12165 } 12166 } 12167 } 12168 12169 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12170 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12171 : CGM(CGF.CGM), 12172 Action((CGM.getLangOpts().OpenMP >= 50 && 12173 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12174 [](const OMPLastprivateClause *C) { 12175 return C->getKind() == 12176 OMPC_LASTPRIVATE_conditional; 12177 })) 12178 ? ActionToDo::PushAsLastprivateConditional 12179 : ActionToDo::DoNotPush) { 12180 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12181 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12182 return; 12183 assert(Action == ActionToDo::PushAsLastprivateConditional && 12184 "Expected a push action."); 12185 LastprivateConditionalData &Data = 12186 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12187 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12188 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12189 continue; 12190 12191 for (const Expr *Ref : C->varlists()) { 12192 Data.DeclToUniqueName.insert(std::make_pair( 12193 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12194 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12195 } 12196 } 12197 Data.IVLVal = IVLVal; 12198 Data.Fn = CGF.CurFn; 12199 } 12200 12201 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12202 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12203 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12204 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12205 if (CGM.getLangOpts().OpenMP < 50) 12206 return; 12207 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12208 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12209 if (!NeedToAddForLPCsAsDisabled.empty()) { 12210 Action = ActionToDo::DisableLastprivateConditional; 12211 LastprivateConditionalData &Data = 12212 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12213 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12214 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12215 Data.Fn = CGF.CurFn; 12216 Data.Disabled = true; 12217 } 12218 } 12219 12220 CGOpenMPRuntime::LastprivateConditionalRAII 12221 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12222 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12223 return LastprivateConditionalRAII(CGF, S); 12224 } 12225 12226 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12227 if (CGM.getLangOpts().OpenMP < 50) 12228 return; 12229 if (Action == ActionToDo::DisableLastprivateConditional) { 12230 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12231 "Expected list of disabled private vars."); 12232 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12233 } 12234 if (Action == ActionToDo::PushAsLastprivateConditional) { 12235 assert( 12236 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12237 "Expected list of lastprivate conditional vars."); 12238 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12239 } 12240 } 12241 12242 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12243 const VarDecl *VD) { 12244 ASTContext &C = CGM.getContext(); 12245 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12246 if (I == LastprivateConditionalToTypes.end()) 12247 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12248 QualType NewType; 12249 const FieldDecl *VDField; 12250 const FieldDecl *FiredField; 12251 LValue BaseLVal; 12252 auto VI = I->getSecond().find(VD); 12253 if (VI == I->getSecond().end()) { 12254 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12255 RD->startDefinition(); 12256 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12257 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12258 RD->completeDefinition(); 12259 NewType = C.getRecordType(RD); 12260 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12261 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12262 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12263 } else { 12264 NewType = std::get<0>(VI->getSecond()); 12265 VDField = std::get<1>(VI->getSecond()); 12266 FiredField = std::get<2>(VI->getSecond()); 12267 BaseLVal = std::get<3>(VI->getSecond()); 12268 } 12269 LValue FiredLVal = 12270 CGF.EmitLValueForField(BaseLVal, FiredField); 12271 CGF.EmitStoreOfScalar( 12272 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12273 FiredLVal); 12274 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12275 } 12276 12277 namespace { 12278 /// Checks if the lastprivate conditional variable is referenced in LHS. 12279 class LastprivateConditionalRefChecker final 12280 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12281 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12282 const Expr *FoundE = nullptr; 12283 const Decl *FoundD = nullptr; 12284 StringRef UniqueDeclName; 12285 LValue IVLVal; 12286 llvm::Function *FoundFn = nullptr; 12287 SourceLocation Loc; 12288 12289 public: 12290 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12291 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12292 llvm::reverse(LPM)) { 12293 auto It = D.DeclToUniqueName.find(E->getDecl()); 12294 if (It == D.DeclToUniqueName.end()) 12295 continue; 12296 if (D.Disabled) 12297 return false; 12298 FoundE = E; 12299 FoundD = E->getDecl()->getCanonicalDecl(); 12300 UniqueDeclName = It->second; 12301 IVLVal = D.IVLVal; 12302 FoundFn = D.Fn; 12303 break; 12304 } 12305 return FoundE == E; 12306 } 12307 bool VisitMemberExpr(const MemberExpr *E) { 12308 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12309 return false; 12310 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12311 llvm::reverse(LPM)) { 12312 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12313 if (It == D.DeclToUniqueName.end()) 12314 continue; 12315 if (D.Disabled) 12316 return false; 12317 FoundE = E; 12318 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12319 UniqueDeclName = It->second; 12320 IVLVal = D.IVLVal; 12321 FoundFn = D.Fn; 12322 break; 12323 } 12324 return FoundE == E; 12325 } 12326 bool VisitStmt(const Stmt *S) { 12327 for (const Stmt *Child : S->children()) { 12328 if (!Child) 12329 continue; 12330 if (const auto *E = dyn_cast<Expr>(Child)) 12331 if (!E->isGLValue()) 12332 continue; 12333 if (Visit(Child)) 12334 return true; 12335 } 12336 return false; 12337 } 12338 explicit LastprivateConditionalRefChecker( 12339 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12340 : LPM(LPM) {} 12341 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12342 getFoundData() const { 12343 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12344 } 12345 }; 12346 } // namespace 12347 12348 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12349 LValue IVLVal, 12350 StringRef UniqueDeclName, 12351 LValue LVal, 12352 SourceLocation Loc) { 12353 // Last updated loop counter for the lastprivate conditional var. 12354 // int<xx> last_iv = 0; 12355 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12356 llvm::Constant *LastIV = 12357 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12358 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12359 IVLVal.getAlignment().getAsAlign()); 12360 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12361 12362 // Last value of the lastprivate conditional. 12363 // decltype(priv_a) last_a; 12364 llvm::Constant *Last = getOrCreateInternalVariable( 12365 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12366 cast<llvm::GlobalVariable>(Last)->setAlignment( 12367 LVal.getAlignment().getAsAlign()); 12368 LValue LastLVal = 12369 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12370 12371 // Global loop counter. Required to handle inner parallel-for regions. 12372 // iv 12373 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12374 12375 // #pragma omp critical(a) 12376 // if (last_iv <= iv) { 12377 // last_iv = iv; 12378 // last_a = priv_a; 12379 // } 12380 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12381 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12382 Action.Enter(CGF); 12383 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12384 // (last_iv <= iv) ? Check if the variable is updated and store new 12385 // value in global var. 12386 llvm::Value *CmpRes; 12387 if (IVLVal.getType()->isSignedIntegerType()) { 12388 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12389 } else { 12390 assert(IVLVal.getType()->isUnsignedIntegerType() && 12391 "Loop iteration variable must be integer."); 12392 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12393 } 12394 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12395 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12396 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12397 // { 12398 CGF.EmitBlock(ThenBB); 12399 12400 // last_iv = iv; 12401 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12402 12403 // last_a = priv_a; 12404 switch (CGF.getEvaluationKind(LVal.getType())) { 12405 case TEK_Scalar: { 12406 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12407 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12408 break; 12409 } 12410 case TEK_Complex: { 12411 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12412 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12413 break; 12414 } 12415 case TEK_Aggregate: 12416 llvm_unreachable( 12417 "Aggregates are not supported in lastprivate conditional."); 12418 } 12419 // } 12420 CGF.EmitBranch(ExitBB); 12421 // There is no need to emit line number for unconditional branch. 12422 (void)ApplyDebugLocation::CreateEmpty(CGF); 12423 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12424 }; 12425 12426 if (CGM.getLangOpts().OpenMPSimd) { 12427 // Do not emit as a critical region as no parallel region could be emitted. 12428 RegionCodeGenTy ThenRCG(CodeGen); 12429 ThenRCG(CGF); 12430 } else { 12431 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12432 } 12433 } 12434 12435 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12436 const Expr *LHS) { 12437 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12438 return; 12439 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12440 if (!Checker.Visit(LHS)) 12441 return; 12442 const Expr *FoundE; 12443 const Decl *FoundD; 12444 StringRef UniqueDeclName; 12445 LValue IVLVal; 12446 llvm::Function *FoundFn; 12447 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12448 Checker.getFoundData(); 12449 if (FoundFn != CGF.CurFn) { 12450 // Special codegen for inner parallel regions. 12451 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12452 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12453 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12454 "Lastprivate conditional is not found in outer region."); 12455 QualType StructTy = std::get<0>(It->getSecond()); 12456 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12457 LValue PrivLVal = CGF.EmitLValue(FoundE); 12458 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12459 PrivLVal.getAddress(CGF), 12460 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12461 LValue BaseLVal = 12462 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12463 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12464 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12465 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12466 FiredLVal, llvm::AtomicOrdering::Unordered, 12467 /*IsVolatile=*/true, /*isInit=*/false); 12468 return; 12469 } 12470 12471 // Private address of the lastprivate conditional in the current context. 12472 // priv_a 12473 LValue LVal = CGF.EmitLValue(FoundE); 12474 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12475 FoundE->getExprLoc()); 12476 } 12477 12478 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12479 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12480 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12481 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12482 return; 12483 auto Range = llvm::reverse(LastprivateConditionalStack); 12484 auto It = llvm::find_if( 12485 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12486 if (It == Range.end() || It->Fn != CGF.CurFn) 12487 return; 12488 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12489 assert(LPCI != LastprivateConditionalToTypes.end() && 12490 "Lastprivates must be registered already."); 12491 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12492 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12493 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12494 for (const auto &Pair : It->DeclToUniqueName) { 12495 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12496 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12497 continue; 12498 auto I = LPCI->getSecond().find(Pair.first); 12499 assert(I != LPCI->getSecond().end() && 12500 "Lastprivate must be rehistered already."); 12501 // bool Cmp = priv_a.Fired != 0; 12502 LValue BaseLVal = std::get<3>(I->getSecond()); 12503 LValue FiredLVal = 12504 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12505 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12506 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12507 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12508 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12509 // if (Cmp) { 12510 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12511 CGF.EmitBlock(ThenBB); 12512 Address Addr = CGF.GetAddrOfLocalVar(VD); 12513 LValue LVal; 12514 if (VD->getType()->isReferenceType()) 12515 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12516 AlignmentSource::Decl); 12517 else 12518 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12519 AlignmentSource::Decl); 12520 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12521 D.getBeginLoc()); 12522 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12523 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12524 // } 12525 } 12526 } 12527 12528 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12529 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12530 SourceLocation Loc) { 12531 if (CGF.getLangOpts().OpenMP < 50) 12532 return; 12533 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12534 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12535 "Unknown lastprivate conditional variable."); 12536 StringRef UniqueName = It->second; 12537 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12538 // The variable was not updated in the region - exit. 12539 if (!GV) 12540 return; 12541 LValue LPLVal = CGF.MakeAddrLValue( 12542 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12543 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12544 CGF.EmitStoreOfScalar(Res, PrivLVal); 12545 } 12546 12547 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12548 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12549 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12550 llvm_unreachable("Not supported in SIMD-only mode"); 12551 } 12552 12553 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12554 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12555 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12556 llvm_unreachable("Not supported in SIMD-only mode"); 12557 } 12558 12559 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12560 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12561 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12562 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12563 bool Tied, unsigned &NumberOfParts) { 12564 llvm_unreachable("Not supported in SIMD-only mode"); 12565 } 12566 12567 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12568 SourceLocation Loc, 12569 llvm::Function *OutlinedFn, 12570 ArrayRef<llvm::Value *> CapturedVars, 12571 const Expr *IfCond) { 12572 llvm_unreachable("Not supported in SIMD-only mode"); 12573 } 12574 12575 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12576 CodeGenFunction &CGF, StringRef CriticalName, 12577 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12578 const Expr *Hint) { 12579 llvm_unreachable("Not supported in SIMD-only mode"); 12580 } 12581 12582 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12583 const RegionCodeGenTy &MasterOpGen, 12584 SourceLocation Loc) { 12585 llvm_unreachable("Not supported in SIMD-only mode"); 12586 } 12587 12588 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12589 const RegionCodeGenTy &MasterOpGen, 12590 SourceLocation Loc, 12591 const Expr *Filter) { 12592 llvm_unreachable("Not supported in SIMD-only mode"); 12593 } 12594 12595 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12596 SourceLocation Loc) { 12597 llvm_unreachable("Not supported in SIMD-only mode"); 12598 } 12599 12600 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12601 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12602 SourceLocation Loc) { 12603 llvm_unreachable("Not supported in SIMD-only mode"); 12604 } 12605 12606 void CGOpenMPSIMDRuntime::emitSingleRegion( 12607 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12608 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12609 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12610 ArrayRef<const Expr *> AssignmentOps) { 12611 llvm_unreachable("Not supported in SIMD-only mode"); 12612 } 12613 12614 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12615 const RegionCodeGenTy &OrderedOpGen, 12616 SourceLocation Loc, 12617 bool IsThreads) { 12618 llvm_unreachable("Not supported in SIMD-only mode"); 12619 } 12620 12621 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12622 SourceLocation Loc, 12623 OpenMPDirectiveKind Kind, 12624 bool EmitChecks, 12625 bool ForceSimpleCall) { 12626 llvm_unreachable("Not supported in SIMD-only mode"); 12627 } 12628 12629 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12630 CodeGenFunction &CGF, SourceLocation Loc, 12631 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12632 bool Ordered, const DispatchRTInput &DispatchValues) { 12633 llvm_unreachable("Not supported in SIMD-only mode"); 12634 } 12635 12636 void CGOpenMPSIMDRuntime::emitForStaticInit( 12637 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12638 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12639 llvm_unreachable("Not supported in SIMD-only mode"); 12640 } 12641 12642 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12643 CodeGenFunction &CGF, SourceLocation Loc, 12644 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12645 llvm_unreachable("Not supported in SIMD-only mode"); 12646 } 12647 12648 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12649 SourceLocation Loc, 12650 unsigned IVSize, 12651 bool IVSigned) { 12652 llvm_unreachable("Not supported in SIMD-only mode"); 12653 } 12654 12655 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12656 SourceLocation Loc, 12657 OpenMPDirectiveKind DKind) { 12658 llvm_unreachable("Not supported in SIMD-only mode"); 12659 } 12660 12661 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12662 SourceLocation Loc, 12663 unsigned IVSize, bool IVSigned, 12664 Address IL, Address LB, 12665 Address UB, Address ST) { 12666 llvm_unreachable("Not supported in SIMD-only mode"); 12667 } 12668 12669 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12670 llvm::Value *NumThreads, 12671 SourceLocation Loc) { 12672 llvm_unreachable("Not supported in SIMD-only mode"); 12673 } 12674 12675 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12676 ProcBindKind ProcBind, 12677 SourceLocation Loc) { 12678 llvm_unreachable("Not supported in SIMD-only mode"); 12679 } 12680 12681 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12682 const VarDecl *VD, 12683 Address VDAddr, 12684 SourceLocation Loc) { 12685 llvm_unreachable("Not supported in SIMD-only mode"); 12686 } 12687 12688 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12689 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12690 CodeGenFunction *CGF) { 12691 llvm_unreachable("Not supported in SIMD-only mode"); 12692 } 12693 12694 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12695 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12696 llvm_unreachable("Not supported in SIMD-only mode"); 12697 } 12698 12699 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12700 ArrayRef<const Expr *> Vars, 12701 SourceLocation Loc, 12702 llvm::AtomicOrdering AO) { 12703 llvm_unreachable("Not supported in SIMD-only mode"); 12704 } 12705 12706 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12707 const OMPExecutableDirective &D, 12708 llvm::Function *TaskFunction, 12709 QualType SharedsTy, Address Shareds, 12710 const Expr *IfCond, 12711 const OMPTaskDataTy &Data) { 12712 llvm_unreachable("Not supported in SIMD-only mode"); 12713 } 12714 12715 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12716 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12717 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12718 const Expr *IfCond, const OMPTaskDataTy &Data) { 12719 llvm_unreachable("Not supported in SIMD-only mode"); 12720 } 12721 12722 void CGOpenMPSIMDRuntime::emitReduction( 12723 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12724 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12725 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12726 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12727 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12728 ReductionOps, Options); 12729 } 12730 12731 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12732 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12733 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12734 llvm_unreachable("Not supported in SIMD-only mode"); 12735 } 12736 12737 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12738 SourceLocation Loc, 12739 bool IsWorksharingReduction) { 12740 llvm_unreachable("Not supported in SIMD-only mode"); 12741 } 12742 12743 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12744 SourceLocation Loc, 12745 ReductionCodeGen &RCG, 12746 unsigned N) { 12747 llvm_unreachable("Not supported in SIMD-only mode"); 12748 } 12749 12750 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12751 SourceLocation Loc, 12752 llvm::Value *ReductionsPtr, 12753 LValue SharedLVal) { 12754 llvm_unreachable("Not supported in SIMD-only mode"); 12755 } 12756 12757 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12758 SourceLocation Loc) { 12759 llvm_unreachable("Not supported in SIMD-only mode"); 12760 } 12761 12762 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12763 CodeGenFunction &CGF, SourceLocation Loc, 12764 OpenMPDirectiveKind CancelRegion) { 12765 llvm_unreachable("Not supported in SIMD-only mode"); 12766 } 12767 12768 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12769 SourceLocation Loc, const Expr *IfCond, 12770 OpenMPDirectiveKind CancelRegion) { 12771 llvm_unreachable("Not supported in SIMD-only mode"); 12772 } 12773 12774 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12775 const OMPExecutableDirective &D, StringRef ParentName, 12776 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12777 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12778 llvm_unreachable("Not supported in SIMD-only mode"); 12779 } 12780 12781 void CGOpenMPSIMDRuntime::emitTargetCall( 12782 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12783 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12784 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12785 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12786 const OMPLoopDirective &D)> 12787 SizeEmitter) { 12788 llvm_unreachable("Not supported in SIMD-only mode"); 12789 } 12790 12791 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12792 llvm_unreachable("Not supported in SIMD-only mode"); 12793 } 12794 12795 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12796 llvm_unreachable("Not supported in SIMD-only mode"); 12797 } 12798 12799 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12800 return false; 12801 } 12802 12803 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12804 const OMPExecutableDirective &D, 12805 SourceLocation Loc, 12806 llvm::Function *OutlinedFn, 12807 ArrayRef<llvm::Value *> CapturedVars) { 12808 llvm_unreachable("Not supported in SIMD-only mode"); 12809 } 12810 12811 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12812 const Expr *NumTeams, 12813 const Expr *ThreadLimit, 12814 SourceLocation Loc) { 12815 llvm_unreachable("Not supported in SIMD-only mode"); 12816 } 12817 12818 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12819 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12820 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12821 llvm_unreachable("Not supported in SIMD-only mode"); 12822 } 12823 12824 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12825 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12826 const Expr *Device) { 12827 llvm_unreachable("Not supported in SIMD-only mode"); 12828 } 12829 12830 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12831 const OMPLoopDirective &D, 12832 ArrayRef<Expr *> NumIterations) { 12833 llvm_unreachable("Not supported in SIMD-only mode"); 12834 } 12835 12836 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12837 const OMPDependClause *C) { 12838 llvm_unreachable("Not supported in SIMD-only mode"); 12839 } 12840 12841 const VarDecl * 12842 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12843 const VarDecl *NativeParam) const { 12844 llvm_unreachable("Not supported in SIMD-only mode"); 12845 } 12846 12847 Address 12848 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12849 const VarDecl *NativeParam, 12850 const VarDecl *TargetParam) const { 12851 llvm_unreachable("Not supported in SIMD-only mode"); 12852 } 12853