1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/APValue.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtVisitor.h" 24 #include "clang/Basic/BitmaskEnum.h" 25 #include "clang/Basic/FileManager.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SetOperations.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415 public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448 }; 449 450 /// Values for bit flags used in the ident_t to describe the fields. 451 /// All enumeric elements are named and described in accordance with the code 452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453 enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477 }; 478 479 namespace { 480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481 /// Values for bit flags for marking which requires clauses have been used. 482 enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496 }; 497 498 enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502 }; 503 } // anonymous namespace 504 505 /// Describes ident structure that describes a source location. 506 /// All descriptions are taken from 507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508 /// Original structure: 509 /// typedef struct ident { 510 /// kmp_int32 reserved_1; /**< might be used in Fortran; 511 /// see above */ 512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513 /// KMP_IDENT_KMPC identifies this union 514 /// member */ 515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516 /// see above */ 517 ///#if USE_ITT_BUILD 518 /// /* but currently used for storing 519 /// region-specific ITT */ 520 /// /* contextual information. */ 521 ///#endif /* USE_ITT_BUILD */ 522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523 /// C++ */ 524 /// char const *psource; /**< String describing the source location. 525 /// The string is composed of semi-colon separated 526 // fields which describe the source file, 527 /// the function and a pair of line numbers that 528 /// delimit the construct. 529 /// */ 530 /// } ident_t; 531 enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544 }; 545 546 /// Schedule types for 'omp for' loops (these enumerators are taken from 547 /// the enum sched_type in kmp.h). 548 enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576 }; 577 578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579 /// region. 580 class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583 public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590 }; 591 592 } // anonymous namespace 593 594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603 } 604 605 /// Check if the combiner is a call to UDR combiner and if it is so return the 606 /// UDR decl used for reduction. 607 static const OMPDeclareReductionDecl * 608 getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616 } 617 618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = 702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 703 // The basic structure here is a while-do loop. 704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 706 llvm::Value *IsEmpty = 707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 709 710 // Enter the loop body, making that address the current address. 711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 712 CGF.EmitBlock(BodyBB); 713 714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 715 716 llvm::PHINode *SrcElementPHI = nullptr; 717 Address SrcElementCurrent = Address::invalid(); 718 if (DRD) { 719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 720 "omp.arraycpy.srcElementPast"); 721 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 722 SrcElementCurrent = 723 Address(SrcElementPHI, 724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 725 } 726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 728 DestElementPHI->addIncoming(DestBegin, EntryBB); 729 Address DestElementCurrent = 730 Address(DestElementPHI, 731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 732 733 // Emit copy. 734 { 735 CodeGenFunction::RunCleanupsScope InitScope(CGF); 736 if (EmitDeclareReductionInit) { 737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 738 SrcElementCurrent, ElementTy); 739 } else 740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 741 /*IsInitializer=*/false); 742 } 743 744 if (DRD) { 745 // Shift the address forward by one element. 746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 748 "omp.arraycpy.dest.element"); 749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 750 } 751 752 // Shift the address forward by one element. 753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 755 "omp.arraycpy.dest.element"); 756 // Check whether we've reached the end. 757 llvm::Value *Done = 758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 761 762 // Done. 763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 767 return CGF.EmitOMPSharedLValue(E); 768 } 769 770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 771 const Expr *E) { 772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 774 return LValue(); 775 } 776 777 void ReductionCodeGen::emitAggregateInitialization( 778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 779 const OMPDeclareReductionDecl *DRD) { 780 // Emit VarDecl with copy init for arrays. 781 // Get the address of the original variable captured in current 782 // captured region. 783 const auto *PrivateVD = 784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 785 bool EmitDeclareReductionInit = 786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 788 EmitDeclareReductionInit, 789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 790 : PrivateVD->getInit(), 791 DRD, SharedLVal.getAddress(CGF)); 792 } 793 794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 795 ArrayRef<const Expr *> Origs, 796 ArrayRef<const Expr *> Privates, 797 ArrayRef<const Expr *> ReductionOps) { 798 ClausesData.reserve(Shareds.size()); 799 SharedAddresses.reserve(Shareds.size()); 800 Sizes.reserve(Shareds.size()); 801 BaseDecls.reserve(Shareds.size()); 802 const auto *IOrig = Origs.begin(); 803 const auto *IPriv = Privates.begin(); 804 const auto *IRed = ReductionOps.begin(); 805 for (const Expr *Ref : Shareds) { 806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 807 std::advance(IOrig, 1); 808 std::advance(IPriv, 1); 809 std::advance(IRed, 1); 810 } 811 } 812 813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 815 "Number of generated lvalues must be exactly N."); 816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 818 SharedAddresses.emplace_back(First, Second); 819 if (ClausesData[N].Shared == ClausesData[N].Ref) { 820 OrigAddresses.emplace_back(First, Second); 821 } else { 822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 824 OrigAddresses.emplace_back(First, Second); 825 } 826 } 827 828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 829 const auto *PrivateVD = 830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 831 QualType PrivateType = PrivateVD->getType(); 832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 833 if (!PrivateType->isVariablyModifiedType()) { 834 Sizes.emplace_back( 835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 836 nullptr); 837 return; 838 } 839 llvm::Value *Size; 840 llvm::Value *SizeInChars; 841 auto *ElemType = 842 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 843 ->getElementType(); 844 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 845 if (AsArraySection) { 846 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 847 OrigAddresses[N].first.getPointer(CGF)); 848 Size = CGF.Builder.CreateNUWAdd( 849 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 850 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 851 } else { 852 SizeInChars = 853 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 854 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 855 } 856 Sizes.emplace_back(SizeInChars, Size); 857 CodeGenFunction::OpaqueValueMapping OpaqueMap( 858 CGF, 859 cast<OpaqueValueExpr>( 860 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 861 RValue::get(Size)); 862 CGF.EmitVariablyModifiedType(PrivateType); 863 } 864 865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 866 llvm::Value *Size) { 867 const auto *PrivateVD = 868 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 869 QualType PrivateType = PrivateVD->getType(); 870 if (!PrivateType->isVariablyModifiedType()) { 871 assert(!Size && !Sizes[N].second && 872 "Size should be nullptr for non-variably modified reduction " 873 "items."); 874 return; 875 } 876 CodeGenFunction::OpaqueValueMapping OpaqueMap( 877 CGF, 878 cast<OpaqueValueExpr>( 879 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 880 RValue::get(Size)); 881 CGF.EmitVariablyModifiedType(PrivateType); 882 } 883 884 void ReductionCodeGen::emitInitialization( 885 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 886 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 887 assert(SharedAddresses.size() > N && "No variable was generated"); 888 const auto *PrivateVD = 889 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 890 const OMPDeclareReductionDecl *DRD = 891 getReductionInit(ClausesData[N].ReductionOp); 892 QualType PrivateType = PrivateVD->getType(); 893 PrivateAddr = CGF.Builder.CreateElementBitCast( 894 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 895 QualType SharedType = SharedAddresses[N].first.getType(); 896 SharedLVal = CGF.MakeAddrLValue( 897 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 898 CGF.ConvertTypeForMem(SharedType)), 899 SharedType, SharedAddresses[N].first.getBaseInfo(), 900 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 901 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 902 if (DRD && DRD->getInitializer()) 903 (void)DefaultInit(CGF); 904 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 905 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 906 (void)DefaultInit(CGF); 907 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 908 PrivateAddr, SharedLVal.getAddress(CGF), 909 SharedLVal.getType()); 910 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 911 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 912 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 913 PrivateVD->getType().getQualifiers(), 914 /*IsInitializer=*/false); 915 } 916 } 917 918 bool ReductionCodeGen::needCleanups(unsigned N) { 919 const auto *PrivateVD = 920 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 921 QualType PrivateType = PrivateVD->getType(); 922 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 923 return DTorKind != QualType::DK_none; 924 } 925 926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 927 Address PrivateAddr) { 928 const auto *PrivateVD = 929 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 930 QualType PrivateType = PrivateVD->getType(); 931 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 932 if (needCleanups(N)) { 933 PrivateAddr = CGF.Builder.CreateElementBitCast( 934 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 935 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 936 } 937 } 938 939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 940 LValue BaseLV) { 941 BaseTy = BaseTy.getNonReferenceType(); 942 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 943 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 944 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 945 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 946 } else { 947 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 948 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 949 } 950 BaseTy = BaseTy->getPointeeType(); 951 } 952 return CGF.MakeAddrLValue( 953 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 954 CGF.ConvertTypeForMem(ElTy)), 955 BaseLV.getType(), BaseLV.getBaseInfo(), 956 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 957 } 958 959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 960 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 961 llvm::Value *Addr) { 962 Address Tmp = Address::invalid(); 963 Address TopTmp = Address::invalid(); 964 Address MostTopTmp = Address::invalid(); 965 BaseTy = BaseTy.getNonReferenceType(); 966 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 967 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 968 Tmp = CGF.CreateMemTemp(BaseTy); 969 if (TopTmp.isValid()) 970 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 971 else 972 MostTopTmp = Tmp; 973 TopTmp = Tmp; 974 BaseTy = BaseTy->getPointeeType(); 975 } 976 llvm::Type *Ty = BaseLVType; 977 if (Tmp.isValid()) 978 Ty = Tmp.getElementType(); 979 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 980 if (Tmp.isValid()) { 981 CGF.Builder.CreateStore(Addr, Tmp); 982 return MostTopTmp; 983 } 984 return Address(Addr, BaseLVAlignment); 985 } 986 987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 988 const VarDecl *OrigVD = nullptr; 989 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 990 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 992 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 993 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 994 Base = TempASE->getBase()->IgnoreParenImpCasts(); 995 DE = cast<DeclRefExpr>(Base); 996 OrigVD = cast<VarDecl>(DE->getDecl()); 997 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 998 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 999 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1000 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1001 DE = cast<DeclRefExpr>(Base); 1002 OrigVD = cast<VarDecl>(DE->getDecl()); 1003 } 1004 return OrigVD; 1005 } 1006 1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1008 Address PrivateAddr) { 1009 const DeclRefExpr *DE; 1010 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1011 BaseDecls.emplace_back(OrigVD); 1012 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1013 LValue BaseLValue = 1014 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1015 OriginalBaseLValue); 1016 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1017 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1018 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1019 llvm::Value *PrivatePointer = 1020 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1021 PrivateAddr.getPointer(), SharedAddr.getType()); 1022 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1023 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1024 return castToBase(CGF, OrigVD->getType(), 1025 SharedAddresses[N].first.getType(), 1026 OriginalBaseLValue.getAddress(CGF).getType(), 1027 OriginalBaseLValue.getAlignment(), Ptr); 1028 } 1029 BaseDecls.emplace_back( 1030 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1031 return PrivateAddr; 1032 } 1033 1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1035 const OMPDeclareReductionDecl *DRD = 1036 getReductionInit(ClausesData[N].ReductionOp); 1037 return DRD && DRD->getInitializer(); 1038 } 1039 1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1041 return CGF.EmitLoadOfPointerLValue( 1042 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1043 getThreadIDVariable()->getType()->castAs<PointerType>()); 1044 } 1045 1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1047 if (!CGF.HaveInsertPoint()) 1048 return; 1049 // 1.2.2 OpenMP Language Terminology 1050 // Structured block - An executable statement with a single entry at the 1051 // top and a single exit at the bottom. 1052 // The point of exit cannot be a branch out of the structured block. 1053 // longjmp() and throw() must not violate the entry/exit criteria. 1054 CGF.EHStack.pushTerminate(); 1055 if (S) 1056 CGF.incrementProfileCounter(S); 1057 CodeGen(CGF); 1058 CGF.EHStack.popTerminate(); 1059 } 1060 1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1062 CodeGenFunction &CGF) { 1063 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1064 getThreadIDVariable()->getType(), 1065 AlignmentSource::Decl); 1066 } 1067 1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1069 QualType FieldTy) { 1070 auto *Field = FieldDecl::Create( 1071 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1072 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1073 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1074 Field->setAccess(AS_public); 1075 DC->addDecl(Field); 1076 return Field; 1077 } 1078 1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1080 StringRef Separator) 1081 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1082 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1083 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1084 1085 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1086 OMPBuilder.initialize(); 1087 loadOffloadInfoMetadata(); 1088 } 1089 1090 void CGOpenMPRuntime::clear() { 1091 InternalVars.clear(); 1092 // Clean non-target variable declarations possibly used only in debug info. 1093 for (const auto &Data : EmittedNonTargetVariables) { 1094 if (!Data.getValue().pointsToAliveValue()) 1095 continue; 1096 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1097 if (!GV) 1098 continue; 1099 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1100 continue; 1101 GV->eraseFromParent(); 1102 } 1103 } 1104 1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1106 SmallString<128> Buffer; 1107 llvm::raw_svector_ostream OS(Buffer); 1108 StringRef Sep = FirstSeparator; 1109 for (StringRef Part : Parts) { 1110 OS << Sep << Part; 1111 Sep = Separator; 1112 } 1113 return std::string(OS.str()); 1114 } 1115 1116 static llvm::Function * 1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1118 const Expr *CombinerInitializer, const VarDecl *In, 1119 const VarDecl *Out, bool IsCombiner) { 1120 // void .omp_combiner.(Ty *in, Ty *out); 1121 ASTContext &C = CGM.getContext(); 1122 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1123 FunctionArgList Args; 1124 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1127 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1128 Args.push_back(&OmpOutParm); 1129 Args.push_back(&OmpInParm); 1130 const CGFunctionInfo &FnInfo = 1131 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1132 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1133 std::string Name = CGM.getOpenMPRuntime().getName( 1134 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1135 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1136 Name, &CGM.getModule()); 1137 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1138 if (CGM.getLangOpts().Optimize) { 1139 Fn->removeFnAttr(llvm::Attribute::NoInline); 1140 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1141 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1142 } 1143 CodeGenFunction CGF(CGM); 1144 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1145 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1146 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1147 Out->getLocation()); 1148 CodeGenFunction::OMPPrivateScope Scope(CGF); 1149 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1150 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1155 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1156 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1157 .getAddress(CGF); 1158 }); 1159 (void)Scope.Privatize(); 1160 if (!IsCombiner && Out->hasInit() && 1161 !CGF.isTrivialInitializer(Out->getInit())) { 1162 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1163 Out->getType().getQualifiers(), 1164 /*IsInitializer=*/true); 1165 } 1166 if (CombinerInitializer) 1167 CGF.EmitIgnoredExpr(CombinerInitializer); 1168 Scope.ForceCleanup(); 1169 CGF.FinishFunction(); 1170 return Fn; 1171 } 1172 1173 void CGOpenMPRuntime::emitUserDefinedReduction( 1174 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1175 if (UDRMap.count(D) > 0) 1176 return; 1177 llvm::Function *Combiner = emitCombinerOrInitializer( 1178 CGM, D->getType(), D->getCombiner(), 1179 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1181 /*IsCombiner=*/true); 1182 llvm::Function *Initializer = nullptr; 1183 if (const Expr *Init = D->getInitializer()) { 1184 Initializer = emitCombinerOrInitializer( 1185 CGM, D->getType(), 1186 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1187 : nullptr, 1188 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1190 /*IsCombiner=*/false); 1191 } 1192 UDRMap.try_emplace(D, Combiner, Initializer); 1193 if (CGF) { 1194 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1195 Decls.second.push_back(D); 1196 } 1197 } 1198 1199 std::pair<llvm::Function *, llvm::Function *> 1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1201 auto I = UDRMap.find(D); 1202 if (I != UDRMap.end()) 1203 return I->second; 1204 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1205 return UDRMap.lookup(D); 1206 } 1207 1208 namespace { 1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1210 // Builder if one is present. 1211 struct PushAndPopStackRAII { 1212 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1213 bool HasCancel, llvm::omp::Directive Kind) 1214 : OMPBuilder(OMPBuilder) { 1215 if (!OMPBuilder) 1216 return; 1217 1218 // The following callback is the crucial part of clangs cleanup process. 1219 // 1220 // NOTE: 1221 // Once the OpenMPIRBuilder is used to create parallel regions (and 1222 // similar), the cancellation destination (Dest below) is determined via 1223 // IP. That means if we have variables to finalize we split the block at IP, 1224 // use the new block (=BB) as destination to build a JumpDest (via 1225 // getJumpDestInCurrentScope(BB)) which then is fed to 1226 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1227 // to push & pop an FinalizationInfo object. 1228 // The FiniCB will still be needed but at the point where the 1229 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1230 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1231 assert(IP.getBlock()->end() == IP.getPoint() && 1232 "Clang CG should cause non-terminated block!"); 1233 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1234 CGF.Builder.restoreIP(IP); 1235 CodeGenFunction::JumpDest Dest = 1236 CGF.getOMPCancelDestination(OMPD_parallel); 1237 CGF.EmitBranchThroughCleanup(Dest); 1238 }; 1239 1240 // TODO: Remove this once we emit parallel regions through the 1241 // OpenMPIRBuilder as it can do this setup internally. 1242 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1243 OMPBuilder->pushFinalizationCB(std::move(FI)); 1244 } 1245 ~PushAndPopStackRAII() { 1246 if (OMPBuilder) 1247 OMPBuilder->popFinalizationCB(); 1248 } 1249 llvm::OpenMPIRBuilder *OMPBuilder; 1250 }; 1251 } // namespace 1252 1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1254 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1255 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1256 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1257 assert(ThreadIDVar->getType()->isPointerType() && 1258 "thread id variable must be of type kmp_int32 *"); 1259 CodeGenFunction CGF(CGM, true); 1260 bool HasCancel = false; 1261 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1262 HasCancel = OPD->hasCancel(); 1263 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1264 HasCancel = OPD->hasCancel(); 1265 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1266 HasCancel = OPSD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 else if (const auto *OPFD = 1274 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1275 HasCancel = OPFD->hasCancel(); 1276 else if (const auto *OPFD = 1277 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1278 HasCancel = OPFD->hasCancel(); 1279 1280 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1281 // parallel region to make cancellation barriers work properly. 1282 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1283 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1285 HasCancel, OutlinedHelperName); 1286 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1287 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1293 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1294 return emitParallelOrTeamsOutlinedFunction( 1295 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1296 } 1297 1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1299 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1300 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1301 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1302 return emitParallelOrTeamsOutlinedFunction( 1303 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1304 } 1305 1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1307 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1308 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1309 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1310 bool Tied, unsigned &NumberOfParts) { 1311 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1312 PrePostActionTy &) { 1313 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1314 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1315 llvm::Value *TaskArgs[] = { 1316 UpLoc, ThreadID, 1317 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1318 TaskTVar->getType()->castAs<PointerType>()) 1319 .getPointer(CGF)}; 1320 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1321 CGM.getModule(), OMPRTL___kmpc_omp_task), 1322 TaskArgs); 1323 }; 1324 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1325 UntiedCodeGen); 1326 CodeGen.setAction(Action); 1327 assert(!ThreadIDVar->getType()->isPointerType() && 1328 "thread id variable must be of type kmp_int32 for tasks"); 1329 const OpenMPDirectiveKind Region = 1330 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1331 : OMPD_task; 1332 const CapturedStmt *CS = D.getCapturedStmt(Region); 1333 bool HasCancel = false; 1334 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1339 HasCancel = TD->hasCancel(); 1340 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1341 HasCancel = TD->hasCancel(); 1342 1343 CodeGenFunction CGF(CGM, true); 1344 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1345 InnermostKind, HasCancel, Action); 1346 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1347 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1348 if (!Tied) 1349 NumberOfParts = Action.getNumberOfParts(); 1350 return Res; 1351 } 1352 1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1354 const RecordDecl *RD, const CGRecordLayout &RL, 1355 ArrayRef<llvm::Constant *> Data) { 1356 llvm::StructType *StructTy = RL.getLLVMType(); 1357 unsigned PrevIdx = 0; 1358 ConstantInitBuilder CIBuilder(CGM); 1359 auto DI = Data.begin(); 1360 for (const FieldDecl *FD : RD->fields()) { 1361 unsigned Idx = RL.getLLVMFieldNo(FD); 1362 // Fill the alignment. 1363 for (unsigned I = PrevIdx; I < Idx; ++I) 1364 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1365 PrevIdx = Idx + 1; 1366 Fields.add(*DI); 1367 ++DI; 1368 } 1369 } 1370 1371 template <class... As> 1372 static llvm::GlobalVariable * 1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1374 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1375 As &&... Args) { 1376 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1377 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1378 ConstantInitBuilder CIBuilder(CGM); 1379 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1380 buildStructValue(Fields, CGM, RD, RL, Data); 1381 return Fields.finishAndCreateGlobal( 1382 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1383 std::forward<As>(Args)...); 1384 } 1385 1386 template <typename T> 1387 static void 1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1389 ArrayRef<llvm::Constant *> Data, 1390 T &Parent) { 1391 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1392 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1393 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1394 buildStructValue(Fields, CGM, RD, RL, Data); 1395 Fields.finishAndAddTo(Parent); 1396 } 1397 1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1399 bool AtCurrentPoint) { 1400 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1401 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1402 1403 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1404 if (AtCurrentPoint) { 1405 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1406 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1407 } else { 1408 Elem.second.ServiceInsertPt = 1409 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1410 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1411 } 1412 } 1413 1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1415 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1416 if (Elem.second.ServiceInsertPt) { 1417 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1418 Elem.second.ServiceInsertPt = nullptr; 1419 Ptr->eraseFromParent(); 1420 } 1421 } 1422 1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1424 SourceLocation Loc, 1425 SmallString<128> &Buffer) { 1426 llvm::raw_svector_ostream OS(Buffer); 1427 // Build debug location 1428 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1429 OS << ";" << PLoc.getFilename() << ";"; 1430 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1431 OS << FD->getQualifiedNameAsString(); 1432 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1433 return OS.str(); 1434 } 1435 1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1437 SourceLocation Loc, 1438 unsigned Flags) { 1439 llvm::Constant *SrcLocStr; 1440 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1441 Loc.isInvalid()) { 1442 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1443 } else { 1444 std::string FunctionName = ""; 1445 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1446 FunctionName = FD->getQualifiedNameAsString(); 1447 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1448 const char *FileName = PLoc.getFilename(); 1449 unsigned Line = PLoc.getLine(); 1450 unsigned Column = PLoc.getColumn(); 1451 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1452 Line, Column); 1453 } 1454 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1455 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1456 Reserved2Flags); 1457 } 1458 1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1460 SourceLocation Loc) { 1461 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1462 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1463 // the clang invariants used below might be broken. 1464 if (CGM.getLangOpts().OpenMPIRBuilder) { 1465 SmallString<128> Buffer; 1466 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1467 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1468 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1469 return OMPBuilder.getOrCreateThreadID( 1470 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1471 } 1472 1473 llvm::Value *ThreadID = nullptr; 1474 // Check whether we've already cached a load of the thread id in this 1475 // function. 1476 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1477 if (I != OpenMPLocThreadIDMap.end()) { 1478 ThreadID = I->second.ThreadID; 1479 if (ThreadID != nullptr) 1480 return ThreadID; 1481 } 1482 // If exceptions are enabled, do not use parameter to avoid possible crash. 1483 if (auto *OMPRegionInfo = 1484 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1485 if (OMPRegionInfo->getThreadIDVariable()) { 1486 // Check if this an outlined function with thread id passed as argument. 1487 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1488 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1489 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1490 !CGF.getLangOpts().CXXExceptions || 1491 CGF.Builder.GetInsertBlock() == TopBlock || 1492 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1493 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1494 TopBlock || 1495 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1496 CGF.Builder.GetInsertBlock()) { 1497 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1498 // If value loaded in entry block, cache it and use it everywhere in 1499 // function. 1500 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1501 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1502 Elem.second.ThreadID = ThreadID; 1503 } 1504 return ThreadID; 1505 } 1506 } 1507 } 1508 1509 // This is not an outlined function region - need to call __kmpc_int32 1510 // kmpc_global_thread_num(ident_t *loc). 1511 // Generate thread id value and cache this value for use across the 1512 // function. 1513 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1514 if (!Elem.second.ServiceInsertPt) 1515 setLocThreadIdInsertPt(CGF); 1516 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1517 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1518 llvm::CallInst *Call = CGF.Builder.CreateCall( 1519 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1520 OMPRTL___kmpc_global_thread_num), 1521 emitUpdateLocation(CGF, Loc)); 1522 Call->setCallingConv(CGF.getRuntimeCC()); 1523 Elem.second.ThreadID = Call; 1524 return Call; 1525 } 1526 1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1528 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1529 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1530 clearLocThreadIdInsertPt(CGF); 1531 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1532 } 1533 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1534 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1535 UDRMap.erase(D); 1536 FunctionUDRMap.erase(CGF.CurFn); 1537 } 1538 auto I = FunctionUDMMap.find(CGF.CurFn); 1539 if (I != FunctionUDMMap.end()) { 1540 for(const auto *D : I->second) 1541 UDMMap.erase(D); 1542 FunctionUDMMap.erase(I); 1543 } 1544 LastprivateConditionalToTypes.erase(CGF.CurFn); 1545 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1549 return OMPBuilder.IdentPtr; 1550 } 1551 1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1553 if (!Kmpc_MicroTy) { 1554 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1555 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1556 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1557 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1558 } 1559 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1560 } 1561 1562 llvm::FunctionCallee 1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1564 assert((IVSize == 32 || IVSize == 64) && 1565 "IV size is not compatible with the omp runtime"); 1566 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1567 : "__kmpc_for_static_init_4u") 1568 : (IVSigned ? "__kmpc_for_static_init_8" 1569 : "__kmpc_for_static_init_8u"); 1570 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1571 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1572 llvm::Type *TypeParams[] = { 1573 getIdentTyPointerTy(), // loc 1574 CGM.Int32Ty, // tid 1575 CGM.Int32Ty, // schedtype 1576 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1577 PtrTy, // p_lower 1578 PtrTy, // p_upper 1579 PtrTy, // p_stride 1580 ITy, // incr 1581 ITy // chunk 1582 }; 1583 auto *FnTy = 1584 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1585 return CGM.CreateRuntimeFunction(FnTy, Name); 1586 } 1587 1588 llvm::FunctionCallee 1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1590 assert((IVSize == 32 || IVSize == 64) && 1591 "IV size is not compatible with the omp runtime"); 1592 StringRef Name = 1593 IVSize == 32 1594 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1595 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1596 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1597 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1598 CGM.Int32Ty, // tid 1599 CGM.Int32Ty, // schedtype 1600 ITy, // lower 1601 ITy, // upper 1602 ITy, // stride 1603 ITy // chunk 1604 }; 1605 auto *FnTy = 1606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1607 return CGM.CreateRuntimeFunction(FnTy, Name); 1608 } 1609 1610 llvm::FunctionCallee 1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1612 assert((IVSize == 32 || IVSize == 64) && 1613 "IV size is not compatible with the omp runtime"); 1614 StringRef Name = 1615 IVSize == 32 1616 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1617 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1618 llvm::Type *TypeParams[] = { 1619 getIdentTyPointerTy(), // loc 1620 CGM.Int32Ty, // tid 1621 }; 1622 auto *FnTy = 1623 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1624 return CGM.CreateRuntimeFunction(FnTy, Name); 1625 } 1626 1627 llvm::FunctionCallee 1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1629 assert((IVSize == 32 || IVSize == 64) && 1630 "IV size is not compatible with the omp runtime"); 1631 StringRef Name = 1632 IVSize == 32 1633 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1634 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1635 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1636 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1637 llvm::Type *TypeParams[] = { 1638 getIdentTyPointerTy(), // loc 1639 CGM.Int32Ty, // tid 1640 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1641 PtrTy, // p_lower 1642 PtrTy, // p_upper 1643 PtrTy // p_stride 1644 }; 1645 auto *FnTy = 1646 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1647 return CGM.CreateRuntimeFunction(FnTy, Name); 1648 } 1649 1650 /// Obtain information that uniquely identifies a target entry. This 1651 /// consists of the file and device IDs as well as line number associated with 1652 /// the relevant entry source location. 1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1654 unsigned &DeviceID, unsigned &FileID, 1655 unsigned &LineNum) { 1656 SourceManager &SM = C.getSourceManager(); 1657 1658 // The loc should be always valid and have a file ID (the user cannot use 1659 // #pragma directives in macros) 1660 1661 assert(Loc.isValid() && "Source location is expected to be always valid."); 1662 1663 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1664 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1665 1666 llvm::sys::fs::UniqueID ID; 1667 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1668 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1669 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1670 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1671 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1672 << PLoc.getFilename() << EC.message(); 1673 } 1674 1675 DeviceID = ID.getDevice(); 1676 FileID = ID.getFile(); 1677 LineNum = PLoc.getLine(); 1678 } 1679 1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1681 if (CGM.getLangOpts().OpenMPSimd) 1682 return Address::invalid(); 1683 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1684 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1685 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1686 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1687 HasRequiresUnifiedSharedMemory))) { 1688 SmallString<64> PtrName; 1689 { 1690 llvm::raw_svector_ostream OS(PtrName); 1691 OS << CGM.getMangledName(GlobalDecl(VD)); 1692 if (!VD->isExternallyVisible()) { 1693 unsigned DeviceID, FileID, Line; 1694 getTargetEntryUniqueInfo(CGM.getContext(), 1695 VD->getCanonicalDecl()->getBeginLoc(), 1696 DeviceID, FileID, Line); 1697 OS << llvm::format("_%x", FileID); 1698 } 1699 OS << "_decl_tgt_ref_ptr"; 1700 } 1701 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1702 if (!Ptr) { 1703 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1704 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1705 PtrName); 1706 1707 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1708 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1709 1710 if (!CGM.getLangOpts().OpenMPIsDevice) 1711 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1712 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1713 } 1714 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1715 } 1716 return Address::invalid(); 1717 } 1718 1719 llvm::Constant * 1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1721 assert(!CGM.getLangOpts().OpenMPUseTLS || 1722 !CGM.getContext().getTargetInfo().isTLSSupported()); 1723 // Lookup the entry, lazily creating it if necessary. 1724 std::string Suffix = getName({"cache", ""}); 1725 return getOrCreateInternalVariable( 1726 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1727 } 1728 1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1730 const VarDecl *VD, 1731 Address VDAddr, 1732 SourceLocation Loc) { 1733 if (CGM.getLangOpts().OpenMPUseTLS && 1734 CGM.getContext().getTargetInfo().isTLSSupported()) 1735 return VDAddr; 1736 1737 llvm::Type *VarTy = VDAddr.getElementType(); 1738 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1739 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1740 CGM.Int8PtrTy), 1741 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1742 getOrCreateThreadPrivateCache(VD)}; 1743 return Address(CGF.EmitRuntimeCall( 1744 OMPBuilder.getOrCreateRuntimeFunction( 1745 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1746 Args), 1747 VDAddr.getAlignment()); 1748 } 1749 1750 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1751 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1752 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1753 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1754 // library. 1755 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1756 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1757 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1758 OMPLoc); 1759 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1760 // to register constructor/destructor for variable. 1761 llvm::Value *Args[] = { 1762 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1763 Ctor, CopyCtor, Dtor}; 1764 CGF.EmitRuntimeCall( 1765 OMPBuilder.getOrCreateRuntimeFunction( 1766 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1767 Args); 1768 } 1769 1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1771 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1772 bool PerformInit, CodeGenFunction *CGF) { 1773 if (CGM.getLangOpts().OpenMPUseTLS && 1774 CGM.getContext().getTargetInfo().isTLSSupported()) 1775 return nullptr; 1776 1777 VD = VD->getDefinition(CGM.getContext()); 1778 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1779 QualType ASTTy = VD->getType(); 1780 1781 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1782 const Expr *Init = VD->getAnyInitializer(); 1783 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1784 // Generate function that re-emits the declaration's initializer into the 1785 // threadprivate copy of the variable VD 1786 CodeGenFunction CtorCGF(CGM); 1787 FunctionArgList Args; 1788 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1789 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1790 ImplicitParamDecl::Other); 1791 Args.push_back(&Dst); 1792 1793 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1794 CGM.getContext().VoidPtrTy, Args); 1795 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1796 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1797 llvm::Function *Fn = 1798 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1799 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1800 Args, Loc, Loc); 1801 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1802 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1803 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1804 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1805 Arg = CtorCGF.Builder.CreateElementBitCast( 1806 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1807 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1808 /*IsInitializer=*/true); 1809 ArgVal = CtorCGF.EmitLoadOfScalar( 1810 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1811 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1812 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1813 CtorCGF.FinishFunction(); 1814 Ctor = Fn; 1815 } 1816 if (VD->getType().isDestructedType() != QualType::DK_none) { 1817 // Generate function that emits destructor call for the threadprivate copy 1818 // of the variable VD 1819 CodeGenFunction DtorCGF(CGM); 1820 FunctionArgList Args; 1821 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1822 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1823 ImplicitParamDecl::Other); 1824 Args.push_back(&Dst); 1825 1826 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1827 CGM.getContext().VoidTy, Args); 1828 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1829 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1830 llvm::Function *Fn = 1831 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1832 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1833 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1834 Loc, Loc); 1835 // Create a scope with an artificial location for the body of this function. 1836 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1837 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1838 DtorCGF.GetAddrOfLocalVar(&Dst), 1839 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1840 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1841 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1842 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1843 DtorCGF.FinishFunction(); 1844 Dtor = Fn; 1845 } 1846 // Do not emit init function if it is not required. 1847 if (!Ctor && !Dtor) 1848 return nullptr; 1849 1850 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1851 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1852 /*isVarArg=*/false) 1853 ->getPointerTo(); 1854 // Copying constructor for the threadprivate variable. 1855 // Must be NULL - reserved by runtime, but currently it requires that this 1856 // parameter is always NULL. Otherwise it fires assertion. 1857 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1858 if (Ctor == nullptr) { 1859 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1860 /*isVarArg=*/false) 1861 ->getPointerTo(); 1862 Ctor = llvm::Constant::getNullValue(CtorTy); 1863 } 1864 if (Dtor == nullptr) { 1865 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1866 /*isVarArg=*/false) 1867 ->getPointerTo(); 1868 Dtor = llvm::Constant::getNullValue(DtorTy); 1869 } 1870 if (!CGF) { 1871 auto *InitFunctionTy = 1872 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1873 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1874 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1875 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1876 CodeGenFunction InitCGF(CGM); 1877 FunctionArgList ArgList; 1878 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1879 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1880 Loc, Loc); 1881 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1882 InitCGF.FinishFunction(); 1883 return InitFunction; 1884 } 1885 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1886 } 1887 return nullptr; 1888 } 1889 1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1891 llvm::GlobalVariable *Addr, 1892 bool PerformInit) { 1893 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1894 !CGM.getLangOpts().OpenMPIsDevice) 1895 return false; 1896 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1897 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1898 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1899 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1900 HasRequiresUnifiedSharedMemory)) 1901 return CGM.getLangOpts().OpenMPIsDevice; 1902 VD = VD->getDefinition(CGM.getContext()); 1903 assert(VD && "Unknown VarDecl"); 1904 1905 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1906 return CGM.getLangOpts().OpenMPIsDevice; 1907 1908 QualType ASTTy = VD->getType(); 1909 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1910 1911 // Produce the unique prefix to identify the new target regions. We use 1912 // the source location of the variable declaration which we know to not 1913 // conflict with any target region. 1914 unsigned DeviceID; 1915 unsigned FileID; 1916 unsigned Line; 1917 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1918 SmallString<128> Buffer, Out; 1919 { 1920 llvm::raw_svector_ostream OS(Buffer); 1921 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1922 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1923 } 1924 1925 const Expr *Init = VD->getAnyInitializer(); 1926 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1927 llvm::Constant *Ctor; 1928 llvm::Constant *ID; 1929 if (CGM.getLangOpts().OpenMPIsDevice) { 1930 // Generate function that re-emits the declaration's initializer into 1931 // the threadprivate copy of the variable VD 1932 CodeGenFunction CtorCGF(CGM); 1933 1934 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1935 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1936 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1937 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1938 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1939 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1940 FunctionArgList(), Loc, Loc); 1941 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1942 CtorCGF.EmitAnyExprToMem(Init, 1943 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1944 Init->getType().getQualifiers(), 1945 /*IsInitializer=*/true); 1946 CtorCGF.FinishFunction(); 1947 Ctor = Fn; 1948 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1949 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1950 } else { 1951 Ctor = new llvm::GlobalVariable( 1952 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1953 llvm::GlobalValue::PrivateLinkage, 1954 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1955 ID = Ctor; 1956 } 1957 1958 // Register the information for the entry associated with the constructor. 1959 Out.clear(); 1960 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1961 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1962 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1963 } 1964 if (VD->getType().isDestructedType() != QualType::DK_none) { 1965 llvm::Constant *Dtor; 1966 llvm::Constant *ID; 1967 if (CGM.getLangOpts().OpenMPIsDevice) { 1968 // Generate function that emits destructor call for the threadprivate 1969 // copy of the variable VD 1970 CodeGenFunction DtorCGF(CGM); 1971 1972 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1973 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1974 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1975 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1976 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1977 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1978 FunctionArgList(), Loc, Loc); 1979 // Create a scope with an artificial location for the body of this 1980 // function. 1981 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1982 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1983 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1984 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1985 DtorCGF.FinishFunction(); 1986 Dtor = Fn; 1987 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1988 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1989 } else { 1990 Dtor = new llvm::GlobalVariable( 1991 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1992 llvm::GlobalValue::PrivateLinkage, 1993 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1994 ID = Dtor; 1995 } 1996 // Register the information for the entry associated with the destructor. 1997 Out.clear(); 1998 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1999 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2000 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2001 } 2002 return CGM.getLangOpts().OpenMPIsDevice; 2003 } 2004 2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2006 QualType VarType, 2007 StringRef Name) { 2008 std::string Suffix = getName({"artificial", ""}); 2009 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2010 llvm::Value *GAddr = 2011 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2012 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2013 CGM.getTarget().isTLSSupported()) { 2014 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2015 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2016 } 2017 std::string CacheSuffix = getName({"cache", ""}); 2018 llvm::Value *Args[] = { 2019 emitUpdateLocation(CGF, SourceLocation()), 2020 getThreadID(CGF, SourceLocation()), 2021 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2022 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2023 /*isSigned=*/false), 2024 getOrCreateInternalVariable( 2025 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2026 return Address( 2027 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2028 CGF.EmitRuntimeCall( 2029 OMPBuilder.getOrCreateRuntimeFunction( 2030 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2031 Args), 2032 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2033 CGM.getContext().getTypeAlignInChars(VarType)); 2034 } 2035 2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2037 const RegionCodeGenTy &ThenGen, 2038 const RegionCodeGenTy &ElseGen) { 2039 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2040 2041 // If the condition constant folds and can be elided, try to avoid emitting 2042 // the condition and the dead arm of the if/else. 2043 bool CondConstant; 2044 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2045 if (CondConstant) 2046 ThenGen(CGF); 2047 else 2048 ElseGen(CGF); 2049 return; 2050 } 2051 2052 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2053 // emit the conditional branch. 2054 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2055 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2056 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2057 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2058 2059 // Emit the 'then' code. 2060 CGF.EmitBlock(ThenBlock); 2061 ThenGen(CGF); 2062 CGF.EmitBranch(ContBlock); 2063 // Emit the 'else' code if present. 2064 // There is no need to emit line number for unconditional branch. 2065 (void)ApplyDebugLocation::CreateEmpty(CGF); 2066 CGF.EmitBlock(ElseBlock); 2067 ElseGen(CGF); 2068 // There is no need to emit line number for unconditional branch. 2069 (void)ApplyDebugLocation::CreateEmpty(CGF); 2070 CGF.EmitBranch(ContBlock); 2071 // Emit the continuation block for code after the if. 2072 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2073 } 2074 2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2076 llvm::Function *OutlinedFn, 2077 ArrayRef<llvm::Value *> CapturedVars, 2078 const Expr *IfCond) { 2079 if (!CGF.HaveInsertPoint()) 2080 return; 2081 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2082 auto &M = CGM.getModule(); 2083 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2084 this](CodeGenFunction &CGF, PrePostActionTy &) { 2085 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2086 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2087 llvm::Value *Args[] = { 2088 RTLoc, 2089 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2090 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2091 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2092 RealArgs.append(std::begin(Args), std::end(Args)); 2093 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2094 2095 llvm::FunctionCallee RTLFn = 2096 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2097 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2098 }; 2099 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2100 this](CodeGenFunction &CGF, PrePostActionTy &) { 2101 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2102 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2103 // Build calls: 2104 // __kmpc_serialized_parallel(&Loc, GTid); 2105 llvm::Value *Args[] = {RTLoc, ThreadID}; 2106 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2107 M, OMPRTL___kmpc_serialized_parallel), 2108 Args); 2109 2110 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2111 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2112 Address ZeroAddrBound = 2113 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2114 /*Name=*/".bound.zero.addr"); 2115 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2116 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2117 // ThreadId for serialized parallels is 0. 2118 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2119 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2120 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2121 2122 // Ensure we do not inline the function. This is trivially true for the ones 2123 // passed to __kmpc_fork_call but the ones calles in serialized regions 2124 // could be inlined. This is not a perfect but it is closer to the invariant 2125 // we want, namely, every data environment starts with a new function. 2126 // TODO: We should pass the if condition to the runtime function and do the 2127 // handling there. Much cleaner code. 2128 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2129 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2130 2131 // __kmpc_end_serialized_parallel(&Loc, GTid); 2132 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2133 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2134 M, OMPRTL___kmpc_end_serialized_parallel), 2135 EndArgs); 2136 }; 2137 if (IfCond) { 2138 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2139 } else { 2140 RegionCodeGenTy ThenRCG(ThenGen); 2141 ThenRCG(CGF); 2142 } 2143 } 2144 2145 // If we're inside an (outlined) parallel region, use the region info's 2146 // thread-ID variable (it is passed in a first argument of the outlined function 2147 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2148 // regular serial code region, get thread ID by calling kmp_int32 2149 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2150 // return the address of that temp. 2151 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2152 SourceLocation Loc) { 2153 if (auto *OMPRegionInfo = 2154 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2155 if (OMPRegionInfo->getThreadIDVariable()) 2156 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2157 2158 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2159 QualType Int32Ty = 2160 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2161 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2162 CGF.EmitStoreOfScalar(ThreadID, 2163 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2164 2165 return ThreadIDTemp; 2166 } 2167 2168 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2169 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2170 SmallString<256> Buffer; 2171 llvm::raw_svector_ostream Out(Buffer); 2172 Out << Name; 2173 StringRef RuntimeName = Out.str(); 2174 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2175 if (Elem.second) { 2176 assert(Elem.second->getType()->getPointerElementType() == Ty && 2177 "OMP internal variable has different type than requested"); 2178 return &*Elem.second; 2179 } 2180 2181 return Elem.second = new llvm::GlobalVariable( 2182 CGM.getModule(), Ty, /*IsConstant*/ false, 2183 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2184 Elem.first(), /*InsertBefore=*/nullptr, 2185 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2186 } 2187 2188 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2189 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2190 std::string Name = getName({Prefix, "var"}); 2191 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2192 } 2193 2194 namespace { 2195 /// Common pre(post)-action for different OpenMP constructs. 2196 class CommonActionTy final : public PrePostActionTy { 2197 llvm::FunctionCallee EnterCallee; 2198 ArrayRef<llvm::Value *> EnterArgs; 2199 llvm::FunctionCallee ExitCallee; 2200 ArrayRef<llvm::Value *> ExitArgs; 2201 bool Conditional; 2202 llvm::BasicBlock *ContBlock = nullptr; 2203 2204 public: 2205 CommonActionTy(llvm::FunctionCallee EnterCallee, 2206 ArrayRef<llvm::Value *> EnterArgs, 2207 llvm::FunctionCallee ExitCallee, 2208 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2209 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2210 ExitArgs(ExitArgs), Conditional(Conditional) {} 2211 void Enter(CodeGenFunction &CGF) override { 2212 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2213 if (Conditional) { 2214 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2215 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2216 ContBlock = CGF.createBasicBlock("omp_if.end"); 2217 // Generate the branch (If-stmt) 2218 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2219 CGF.EmitBlock(ThenBlock); 2220 } 2221 } 2222 void Done(CodeGenFunction &CGF) { 2223 // Emit the rest of blocks/branches 2224 CGF.EmitBranch(ContBlock); 2225 CGF.EmitBlock(ContBlock, true); 2226 } 2227 void Exit(CodeGenFunction &CGF) override { 2228 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2229 } 2230 }; 2231 } // anonymous namespace 2232 2233 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2234 StringRef CriticalName, 2235 const RegionCodeGenTy &CriticalOpGen, 2236 SourceLocation Loc, const Expr *Hint) { 2237 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2238 // CriticalOpGen(); 2239 // __kmpc_end_critical(ident_t *, gtid, Lock); 2240 // Prepare arguments and build a call to __kmpc_critical 2241 if (!CGF.HaveInsertPoint()) 2242 return; 2243 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2244 getCriticalRegionLock(CriticalName)}; 2245 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2246 std::end(Args)); 2247 if (Hint) { 2248 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2249 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2250 } 2251 CommonActionTy Action( 2252 OMPBuilder.getOrCreateRuntimeFunction( 2253 CGM.getModule(), 2254 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2255 EnterArgs, 2256 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2257 OMPRTL___kmpc_end_critical), 2258 Args); 2259 CriticalOpGen.setAction(Action); 2260 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2261 } 2262 2263 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2264 const RegionCodeGenTy &MasterOpGen, 2265 SourceLocation Loc) { 2266 if (!CGF.HaveInsertPoint()) 2267 return; 2268 // if(__kmpc_master(ident_t *, gtid)) { 2269 // MasterOpGen(); 2270 // __kmpc_end_master(ident_t *, gtid); 2271 // } 2272 // Prepare arguments and build a call to __kmpc_master 2273 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2274 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2275 CGM.getModule(), OMPRTL___kmpc_master), 2276 Args, 2277 OMPBuilder.getOrCreateRuntimeFunction( 2278 CGM.getModule(), OMPRTL___kmpc_end_master), 2279 Args, 2280 /*Conditional=*/true); 2281 MasterOpGen.setAction(Action); 2282 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2283 Action.Done(CGF); 2284 } 2285 2286 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2287 const RegionCodeGenTy &MaskedOpGen, 2288 SourceLocation Loc, const Expr *Filter) { 2289 if (!CGF.HaveInsertPoint()) 2290 return; 2291 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2292 // MaskedOpGen(); 2293 // __kmpc_end_masked(iden_t *, gtid); 2294 // } 2295 // Prepare arguments and build a call to __kmpc_masked 2296 llvm::Value *FilterVal = Filter 2297 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2298 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2299 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2300 FilterVal}; 2301 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2302 getThreadID(CGF, Loc)}; 2303 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2304 CGM.getModule(), OMPRTL___kmpc_masked), 2305 Args, 2306 OMPBuilder.getOrCreateRuntimeFunction( 2307 CGM.getModule(), OMPRTL___kmpc_end_masked), 2308 ArgsEnd, 2309 /*Conditional=*/true); 2310 MaskedOpGen.setAction(Action); 2311 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2312 Action.Done(CGF); 2313 } 2314 2315 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2316 SourceLocation Loc) { 2317 if (!CGF.HaveInsertPoint()) 2318 return; 2319 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2320 OMPBuilder.createTaskyield(CGF.Builder); 2321 } else { 2322 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2323 llvm::Value *Args[] = { 2324 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2325 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2326 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2327 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2328 Args); 2329 } 2330 2331 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2332 Region->emitUntiedSwitch(CGF); 2333 } 2334 2335 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2336 const RegionCodeGenTy &TaskgroupOpGen, 2337 SourceLocation Loc) { 2338 if (!CGF.HaveInsertPoint()) 2339 return; 2340 // __kmpc_taskgroup(ident_t *, gtid); 2341 // TaskgroupOpGen(); 2342 // __kmpc_end_taskgroup(ident_t *, gtid); 2343 // Prepare arguments and build a call to __kmpc_taskgroup 2344 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2345 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2346 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2347 Args, 2348 OMPBuilder.getOrCreateRuntimeFunction( 2349 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2350 Args); 2351 TaskgroupOpGen.setAction(Action); 2352 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2353 } 2354 2355 /// Given an array of pointers to variables, project the address of a 2356 /// given variable. 2357 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2358 unsigned Index, const VarDecl *Var) { 2359 // Pull out the pointer to the variable. 2360 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2361 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2362 2363 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2364 Addr = CGF.Builder.CreateElementBitCast( 2365 Addr, CGF.ConvertTypeForMem(Var->getType())); 2366 return Addr; 2367 } 2368 2369 static llvm::Value *emitCopyprivateCopyFunction( 2370 CodeGenModule &CGM, llvm::Type *ArgsType, 2371 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2372 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2373 SourceLocation Loc) { 2374 ASTContext &C = CGM.getContext(); 2375 // void copy_func(void *LHSArg, void *RHSArg); 2376 FunctionArgList Args; 2377 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2378 ImplicitParamDecl::Other); 2379 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2380 ImplicitParamDecl::Other); 2381 Args.push_back(&LHSArg); 2382 Args.push_back(&RHSArg); 2383 const auto &CGFI = 2384 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2385 std::string Name = 2386 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2387 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2388 llvm::GlobalValue::InternalLinkage, Name, 2389 &CGM.getModule()); 2390 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2391 Fn->setDoesNotRecurse(); 2392 CodeGenFunction CGF(CGM); 2393 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2394 // Dest = (void*[n])(LHSArg); 2395 // Src = (void*[n])(RHSArg); 2396 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2397 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2398 ArgsType), CGF.getPointerAlign()); 2399 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2400 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2401 ArgsType), CGF.getPointerAlign()); 2402 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2403 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2404 // ... 2405 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2406 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2407 const auto *DestVar = 2408 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2409 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2410 2411 const auto *SrcVar = 2412 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2413 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2414 2415 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2416 QualType Type = VD->getType(); 2417 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2418 } 2419 CGF.FinishFunction(); 2420 return Fn; 2421 } 2422 2423 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2424 const RegionCodeGenTy &SingleOpGen, 2425 SourceLocation Loc, 2426 ArrayRef<const Expr *> CopyprivateVars, 2427 ArrayRef<const Expr *> SrcExprs, 2428 ArrayRef<const Expr *> DstExprs, 2429 ArrayRef<const Expr *> AssignmentOps) { 2430 if (!CGF.HaveInsertPoint()) 2431 return; 2432 assert(CopyprivateVars.size() == SrcExprs.size() && 2433 CopyprivateVars.size() == DstExprs.size() && 2434 CopyprivateVars.size() == AssignmentOps.size()); 2435 ASTContext &C = CGM.getContext(); 2436 // int32 did_it = 0; 2437 // if(__kmpc_single(ident_t *, gtid)) { 2438 // SingleOpGen(); 2439 // __kmpc_end_single(ident_t *, gtid); 2440 // did_it = 1; 2441 // } 2442 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2443 // <copy_func>, did_it); 2444 2445 Address DidIt = Address::invalid(); 2446 if (!CopyprivateVars.empty()) { 2447 // int32 did_it = 0; 2448 QualType KmpInt32Ty = 2449 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2450 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2451 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2452 } 2453 // Prepare arguments and build a call to __kmpc_single 2454 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2455 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2456 CGM.getModule(), OMPRTL___kmpc_single), 2457 Args, 2458 OMPBuilder.getOrCreateRuntimeFunction( 2459 CGM.getModule(), OMPRTL___kmpc_end_single), 2460 Args, 2461 /*Conditional=*/true); 2462 SingleOpGen.setAction(Action); 2463 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2464 if (DidIt.isValid()) { 2465 // did_it = 1; 2466 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2467 } 2468 Action.Done(CGF); 2469 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2470 // <copy_func>, did_it); 2471 if (DidIt.isValid()) { 2472 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2473 QualType CopyprivateArrayTy = C.getConstantArrayType( 2474 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2475 /*IndexTypeQuals=*/0); 2476 // Create a list of all private variables for copyprivate. 2477 Address CopyprivateList = 2478 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2479 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2480 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2481 CGF.Builder.CreateStore( 2482 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2483 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2484 CGF.VoidPtrTy), 2485 Elem); 2486 } 2487 // Build function that copies private values from single region to all other 2488 // threads in the corresponding parallel region. 2489 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2490 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2491 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2492 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2493 Address CL = 2494 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2495 CGF.VoidPtrTy); 2496 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2497 llvm::Value *Args[] = { 2498 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2499 getThreadID(CGF, Loc), // i32 <gtid> 2500 BufSize, // size_t <buf_size> 2501 CL.getPointer(), // void *<copyprivate list> 2502 CpyFn, // void (*) (void *, void *) <copy_func> 2503 DidItVal // i32 did_it 2504 }; 2505 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2506 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2507 Args); 2508 } 2509 } 2510 2511 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2512 const RegionCodeGenTy &OrderedOpGen, 2513 SourceLocation Loc, bool IsThreads) { 2514 if (!CGF.HaveInsertPoint()) 2515 return; 2516 // __kmpc_ordered(ident_t *, gtid); 2517 // OrderedOpGen(); 2518 // __kmpc_end_ordered(ident_t *, gtid); 2519 // Prepare arguments and build a call to __kmpc_ordered 2520 if (IsThreads) { 2521 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2522 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2523 CGM.getModule(), OMPRTL___kmpc_ordered), 2524 Args, 2525 OMPBuilder.getOrCreateRuntimeFunction( 2526 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2527 Args); 2528 OrderedOpGen.setAction(Action); 2529 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2530 return; 2531 } 2532 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2533 } 2534 2535 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2536 unsigned Flags; 2537 if (Kind == OMPD_for) 2538 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2539 else if (Kind == OMPD_sections) 2540 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2541 else if (Kind == OMPD_single) 2542 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2543 else if (Kind == OMPD_barrier) 2544 Flags = OMP_IDENT_BARRIER_EXPL; 2545 else 2546 Flags = OMP_IDENT_BARRIER_IMPL; 2547 return Flags; 2548 } 2549 2550 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2551 CodeGenFunction &CGF, const OMPLoopDirective &S, 2552 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2553 // Check if the loop directive is actually a doacross loop directive. In this 2554 // case choose static, 1 schedule. 2555 if (llvm::any_of( 2556 S.getClausesOfKind<OMPOrderedClause>(), 2557 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2558 ScheduleKind = OMPC_SCHEDULE_static; 2559 // Chunk size is 1 in this case. 2560 llvm::APInt ChunkSize(32, 1); 2561 ChunkExpr = IntegerLiteral::Create( 2562 CGF.getContext(), ChunkSize, 2563 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2564 SourceLocation()); 2565 } 2566 } 2567 2568 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2569 OpenMPDirectiveKind Kind, bool EmitChecks, 2570 bool ForceSimpleCall) { 2571 // Check if we should use the OMPBuilder 2572 auto *OMPRegionInfo = 2573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2574 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2575 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2576 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2577 return; 2578 } 2579 2580 if (!CGF.HaveInsertPoint()) 2581 return; 2582 // Build call __kmpc_cancel_barrier(loc, thread_id); 2583 // Build call __kmpc_barrier(loc, thread_id); 2584 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2585 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2586 // thread_id); 2587 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2588 getThreadID(CGF, Loc)}; 2589 if (OMPRegionInfo) { 2590 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2591 llvm::Value *Result = CGF.EmitRuntimeCall( 2592 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2593 OMPRTL___kmpc_cancel_barrier), 2594 Args); 2595 if (EmitChecks) { 2596 // if (__kmpc_cancel_barrier()) { 2597 // exit from construct; 2598 // } 2599 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2600 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2601 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2602 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2603 CGF.EmitBlock(ExitBB); 2604 // exit from construct; 2605 CodeGenFunction::JumpDest CancelDestination = 2606 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2607 CGF.EmitBranchThroughCleanup(CancelDestination); 2608 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2609 } 2610 return; 2611 } 2612 } 2613 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2614 CGM.getModule(), OMPRTL___kmpc_barrier), 2615 Args); 2616 } 2617 2618 /// Map the OpenMP loop schedule to the runtime enumeration. 2619 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2620 bool Chunked, bool Ordered) { 2621 switch (ScheduleKind) { 2622 case OMPC_SCHEDULE_static: 2623 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2624 : (Ordered ? OMP_ord_static : OMP_sch_static); 2625 case OMPC_SCHEDULE_dynamic: 2626 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2627 case OMPC_SCHEDULE_guided: 2628 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2629 case OMPC_SCHEDULE_runtime: 2630 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2631 case OMPC_SCHEDULE_auto: 2632 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2633 case OMPC_SCHEDULE_unknown: 2634 assert(!Chunked && "chunk was specified but schedule kind not known"); 2635 return Ordered ? OMP_ord_static : OMP_sch_static; 2636 } 2637 llvm_unreachable("Unexpected runtime schedule"); 2638 } 2639 2640 /// Map the OpenMP distribute schedule to the runtime enumeration. 2641 static OpenMPSchedType 2642 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2643 // only static is allowed for dist_schedule 2644 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2645 } 2646 2647 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2648 bool Chunked) const { 2649 OpenMPSchedType Schedule = 2650 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2651 return Schedule == OMP_sch_static; 2652 } 2653 2654 bool CGOpenMPRuntime::isStaticNonchunked( 2655 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2656 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2657 return Schedule == OMP_dist_sch_static; 2658 } 2659 2660 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2661 bool Chunked) const { 2662 OpenMPSchedType Schedule = 2663 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2664 return Schedule == OMP_sch_static_chunked; 2665 } 2666 2667 bool CGOpenMPRuntime::isStaticChunked( 2668 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2669 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2670 return Schedule == OMP_dist_sch_static_chunked; 2671 } 2672 2673 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2674 OpenMPSchedType Schedule = 2675 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2676 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2677 return Schedule != OMP_sch_static; 2678 } 2679 2680 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2681 OpenMPScheduleClauseModifier M1, 2682 OpenMPScheduleClauseModifier M2) { 2683 int Modifier = 0; 2684 switch (M1) { 2685 case OMPC_SCHEDULE_MODIFIER_monotonic: 2686 Modifier = OMP_sch_modifier_monotonic; 2687 break; 2688 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2689 Modifier = OMP_sch_modifier_nonmonotonic; 2690 break; 2691 case OMPC_SCHEDULE_MODIFIER_simd: 2692 if (Schedule == OMP_sch_static_chunked) 2693 Schedule = OMP_sch_static_balanced_chunked; 2694 break; 2695 case OMPC_SCHEDULE_MODIFIER_last: 2696 case OMPC_SCHEDULE_MODIFIER_unknown: 2697 break; 2698 } 2699 switch (M2) { 2700 case OMPC_SCHEDULE_MODIFIER_monotonic: 2701 Modifier = OMP_sch_modifier_monotonic; 2702 break; 2703 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2704 Modifier = OMP_sch_modifier_nonmonotonic; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_simd: 2707 if (Schedule == OMP_sch_static_chunked) 2708 Schedule = OMP_sch_static_balanced_chunked; 2709 break; 2710 case OMPC_SCHEDULE_MODIFIER_last: 2711 case OMPC_SCHEDULE_MODIFIER_unknown: 2712 break; 2713 } 2714 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2715 // If the static schedule kind is specified or if the ordered clause is 2716 // specified, and if the nonmonotonic modifier is not specified, the effect is 2717 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2718 // modifier is specified, the effect is as if the nonmonotonic modifier is 2719 // specified. 2720 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2721 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2722 Schedule == OMP_sch_static_balanced_chunked || 2723 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2724 Schedule == OMP_dist_sch_static_chunked || 2725 Schedule == OMP_dist_sch_static)) 2726 Modifier = OMP_sch_modifier_nonmonotonic; 2727 } 2728 return Schedule | Modifier; 2729 } 2730 2731 void CGOpenMPRuntime::emitForDispatchInit( 2732 CodeGenFunction &CGF, SourceLocation Loc, 2733 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2734 bool Ordered, const DispatchRTInput &DispatchValues) { 2735 if (!CGF.HaveInsertPoint()) 2736 return; 2737 OpenMPSchedType Schedule = getRuntimeSchedule( 2738 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2739 assert(Ordered || 2740 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2741 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2742 Schedule != OMP_sch_static_balanced_chunked)); 2743 // Call __kmpc_dispatch_init( 2744 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2745 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2746 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2747 2748 // If the Chunk was not specified in the clause - use default value 1. 2749 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2750 : CGF.Builder.getIntN(IVSize, 1); 2751 llvm::Value *Args[] = { 2752 emitUpdateLocation(CGF, Loc), 2753 getThreadID(CGF, Loc), 2754 CGF.Builder.getInt32(addMonoNonMonoModifier( 2755 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2756 DispatchValues.LB, // Lower 2757 DispatchValues.UB, // Upper 2758 CGF.Builder.getIntN(IVSize, 1), // Stride 2759 Chunk // Chunk 2760 }; 2761 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2762 } 2763 2764 static void emitForStaticInitCall( 2765 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2766 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2767 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2768 const CGOpenMPRuntime::StaticRTInput &Values) { 2769 if (!CGF.HaveInsertPoint()) 2770 return; 2771 2772 assert(!Values.Ordered); 2773 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2774 Schedule == OMP_sch_static_balanced_chunked || 2775 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2776 Schedule == OMP_dist_sch_static || 2777 Schedule == OMP_dist_sch_static_chunked); 2778 2779 // Call __kmpc_for_static_init( 2780 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2781 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2782 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2783 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2784 llvm::Value *Chunk = Values.Chunk; 2785 if (Chunk == nullptr) { 2786 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2787 Schedule == OMP_dist_sch_static) && 2788 "expected static non-chunked schedule"); 2789 // If the Chunk was not specified in the clause - use default value 1. 2790 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2791 } else { 2792 assert((Schedule == OMP_sch_static_chunked || 2793 Schedule == OMP_sch_static_balanced_chunked || 2794 Schedule == OMP_ord_static_chunked || 2795 Schedule == OMP_dist_sch_static_chunked) && 2796 "expected static chunked schedule"); 2797 } 2798 llvm::Value *Args[] = { 2799 UpdateLocation, 2800 ThreadId, 2801 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2802 M2)), // Schedule type 2803 Values.IL.getPointer(), // &isLastIter 2804 Values.LB.getPointer(), // &LB 2805 Values.UB.getPointer(), // &UB 2806 Values.ST.getPointer(), // &Stride 2807 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2808 Chunk // Chunk 2809 }; 2810 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2811 } 2812 2813 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2814 SourceLocation Loc, 2815 OpenMPDirectiveKind DKind, 2816 const OpenMPScheduleTy &ScheduleKind, 2817 const StaticRTInput &Values) { 2818 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2819 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2820 assert(isOpenMPWorksharingDirective(DKind) && 2821 "Expected loop-based or sections-based directive."); 2822 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2823 isOpenMPLoopDirective(DKind) 2824 ? OMP_IDENT_WORK_LOOP 2825 : OMP_IDENT_WORK_SECTIONS); 2826 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2827 llvm::FunctionCallee StaticInitFunction = 2828 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2829 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2830 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2831 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2832 } 2833 2834 void CGOpenMPRuntime::emitDistributeStaticInit( 2835 CodeGenFunction &CGF, SourceLocation Loc, 2836 OpenMPDistScheduleClauseKind SchedKind, 2837 const CGOpenMPRuntime::StaticRTInput &Values) { 2838 OpenMPSchedType ScheduleNum = 2839 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2840 llvm::Value *UpdatedLocation = 2841 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2842 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2843 llvm::FunctionCallee StaticInitFunction = 2844 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2845 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2846 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2847 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2848 } 2849 2850 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2851 SourceLocation Loc, 2852 OpenMPDirectiveKind DKind) { 2853 if (!CGF.HaveInsertPoint()) 2854 return; 2855 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2856 llvm::Value *Args[] = { 2857 emitUpdateLocation(CGF, Loc, 2858 isOpenMPDistributeDirective(DKind) 2859 ? OMP_IDENT_WORK_DISTRIBUTE 2860 : isOpenMPLoopDirective(DKind) 2861 ? OMP_IDENT_WORK_LOOP 2862 : OMP_IDENT_WORK_SECTIONS), 2863 getThreadID(CGF, Loc)}; 2864 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2865 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2866 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2867 Args); 2868 } 2869 2870 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2871 SourceLocation Loc, 2872 unsigned IVSize, 2873 bool IVSigned) { 2874 if (!CGF.HaveInsertPoint()) 2875 return; 2876 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2877 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2878 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2879 } 2880 2881 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2882 SourceLocation Loc, unsigned IVSize, 2883 bool IVSigned, Address IL, 2884 Address LB, Address UB, 2885 Address ST) { 2886 // Call __kmpc_dispatch_next( 2887 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2888 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2889 // kmp_int[32|64] *p_stride); 2890 llvm::Value *Args[] = { 2891 emitUpdateLocation(CGF, Loc), 2892 getThreadID(CGF, Loc), 2893 IL.getPointer(), // &isLastIter 2894 LB.getPointer(), // &Lower 2895 UB.getPointer(), // &Upper 2896 ST.getPointer() // &Stride 2897 }; 2898 llvm::Value *Call = 2899 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2900 return CGF.EmitScalarConversion( 2901 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2902 CGF.getContext().BoolTy, Loc); 2903 } 2904 2905 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2906 llvm::Value *NumThreads, 2907 SourceLocation Loc) { 2908 if (!CGF.HaveInsertPoint()) 2909 return; 2910 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2911 llvm::Value *Args[] = { 2912 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2913 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2914 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2915 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2916 Args); 2917 } 2918 2919 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2920 ProcBindKind ProcBind, 2921 SourceLocation Loc) { 2922 if (!CGF.HaveInsertPoint()) 2923 return; 2924 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2925 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2926 llvm::Value *Args[] = { 2927 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2928 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2929 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2930 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2931 Args); 2932 } 2933 2934 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2935 SourceLocation Loc, llvm::AtomicOrdering AO) { 2936 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2937 OMPBuilder.createFlush(CGF.Builder); 2938 } else { 2939 if (!CGF.HaveInsertPoint()) 2940 return; 2941 // Build call void __kmpc_flush(ident_t *loc) 2942 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2943 CGM.getModule(), OMPRTL___kmpc_flush), 2944 emitUpdateLocation(CGF, Loc)); 2945 } 2946 } 2947 2948 namespace { 2949 /// Indexes of fields for type kmp_task_t. 2950 enum KmpTaskTFields { 2951 /// List of shared variables. 2952 KmpTaskTShareds, 2953 /// Task routine. 2954 KmpTaskTRoutine, 2955 /// Partition id for the untied tasks. 2956 KmpTaskTPartId, 2957 /// Function with call of destructors for private variables. 2958 Data1, 2959 /// Task priority. 2960 Data2, 2961 /// (Taskloops only) Lower bound. 2962 KmpTaskTLowerBound, 2963 /// (Taskloops only) Upper bound. 2964 KmpTaskTUpperBound, 2965 /// (Taskloops only) Stride. 2966 KmpTaskTStride, 2967 /// (Taskloops only) Is last iteration flag. 2968 KmpTaskTLastIter, 2969 /// (Taskloops only) Reduction data. 2970 KmpTaskTReductions, 2971 }; 2972 } // anonymous namespace 2973 2974 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2975 return OffloadEntriesTargetRegion.empty() && 2976 OffloadEntriesDeviceGlobalVar.empty(); 2977 } 2978 2979 /// Initialize target region entry. 2980 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2981 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2982 StringRef ParentName, unsigned LineNum, 2983 unsigned Order) { 2984 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2985 "only required for the device " 2986 "code generation."); 2987 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2988 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2989 OMPTargetRegionEntryTargetRegion); 2990 ++OffloadingEntriesNum; 2991 } 2992 2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2994 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2995 StringRef ParentName, unsigned LineNum, 2996 llvm::Constant *Addr, llvm::Constant *ID, 2997 OMPTargetRegionEntryKind Flags) { 2998 // If we are emitting code for a target, the entry is already initialized, 2999 // only has to be registered. 3000 if (CGM.getLangOpts().OpenMPIsDevice) { 3001 // This could happen if the device compilation is invoked standalone. 3002 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3003 return; 3004 auto &Entry = 3005 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3006 Entry.setAddress(Addr); 3007 Entry.setID(ID); 3008 Entry.setFlags(Flags); 3009 } else { 3010 if (Flags == 3011 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3012 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3013 /*IgnoreAddressId*/ true)) 3014 return; 3015 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3016 "Target region entry already registered!"); 3017 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3018 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3019 ++OffloadingEntriesNum; 3020 } 3021 } 3022 3023 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3024 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3025 bool IgnoreAddressId) const { 3026 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3027 if (PerDevice == OffloadEntriesTargetRegion.end()) 3028 return false; 3029 auto PerFile = PerDevice->second.find(FileID); 3030 if (PerFile == PerDevice->second.end()) 3031 return false; 3032 auto PerParentName = PerFile->second.find(ParentName); 3033 if (PerParentName == PerFile->second.end()) 3034 return false; 3035 auto PerLine = PerParentName->second.find(LineNum); 3036 if (PerLine == PerParentName->second.end()) 3037 return false; 3038 // Fail if this entry is already registered. 3039 if (!IgnoreAddressId && 3040 (PerLine->second.getAddress() || PerLine->second.getID())) 3041 return false; 3042 return true; 3043 } 3044 3045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3046 const OffloadTargetRegionEntryInfoActTy &Action) { 3047 // Scan all target region entries and perform the provided action. 3048 for (const auto &D : OffloadEntriesTargetRegion) 3049 for (const auto &F : D.second) 3050 for (const auto &P : F.second) 3051 for (const auto &L : P.second) 3052 Action(D.first, F.first, P.first(), L.first, L.second); 3053 } 3054 3055 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3056 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3057 OMPTargetGlobalVarEntryKind Flags, 3058 unsigned Order) { 3059 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3060 "only required for the device " 3061 "code generation."); 3062 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3063 ++OffloadingEntriesNum; 3064 } 3065 3066 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3067 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3068 CharUnits VarSize, 3069 OMPTargetGlobalVarEntryKind Flags, 3070 llvm::GlobalValue::LinkageTypes Linkage) { 3071 if (CGM.getLangOpts().OpenMPIsDevice) { 3072 // This could happen if the device compilation is invoked standalone. 3073 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3074 return; 3075 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3076 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3077 if (Entry.getVarSize().isZero()) { 3078 Entry.setVarSize(VarSize); 3079 Entry.setLinkage(Linkage); 3080 } 3081 return; 3082 } 3083 Entry.setVarSize(VarSize); 3084 Entry.setLinkage(Linkage); 3085 Entry.setAddress(Addr); 3086 } else { 3087 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3088 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3089 assert(Entry.isValid() && Entry.getFlags() == Flags && 3090 "Entry not initialized!"); 3091 if (Entry.getVarSize().isZero()) { 3092 Entry.setVarSize(VarSize); 3093 Entry.setLinkage(Linkage); 3094 } 3095 return; 3096 } 3097 OffloadEntriesDeviceGlobalVar.try_emplace( 3098 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3099 ++OffloadingEntriesNum; 3100 } 3101 } 3102 3103 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3104 actOnDeviceGlobalVarEntriesInfo( 3105 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3106 // Scan all target region entries and perform the provided action. 3107 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3108 Action(E.getKey(), E.getValue()); 3109 } 3110 3111 void CGOpenMPRuntime::createOffloadEntry( 3112 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3113 llvm::GlobalValue::LinkageTypes Linkage) { 3114 StringRef Name = Addr->getName(); 3115 llvm::Module &M = CGM.getModule(); 3116 llvm::LLVMContext &C = M.getContext(); 3117 3118 // Create constant string with the name. 3119 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3120 3121 std::string StringName = getName({"omp_offloading", "entry_name"}); 3122 auto *Str = new llvm::GlobalVariable( 3123 M, StrPtrInit->getType(), /*isConstant=*/true, 3124 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3125 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3126 3127 llvm::Constant *Data[] = { 3128 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3129 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3130 llvm::ConstantInt::get(CGM.SizeTy, Size), 3131 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3132 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3133 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3134 llvm::GlobalVariable *Entry = createGlobalStruct( 3135 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3136 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3137 3138 // The entry has to be created in the section the linker expects it to be. 3139 Entry->setSection("omp_offloading_entries"); 3140 } 3141 3142 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3143 // Emit the offloading entries and metadata so that the device codegen side 3144 // can easily figure out what to emit. The produced metadata looks like 3145 // this: 3146 // 3147 // !omp_offload.info = !{!1, ...} 3148 // 3149 // Right now we only generate metadata for function that contain target 3150 // regions. 3151 3152 // If we are in simd mode or there are no entries, we don't need to do 3153 // anything. 3154 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3155 return; 3156 3157 llvm::Module &M = CGM.getModule(); 3158 llvm::LLVMContext &C = M.getContext(); 3159 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3160 SourceLocation, StringRef>, 3161 16> 3162 OrderedEntries(OffloadEntriesInfoManager.size()); 3163 llvm::SmallVector<StringRef, 16> ParentFunctions( 3164 OffloadEntriesInfoManager.size()); 3165 3166 // Auxiliary methods to create metadata values and strings. 3167 auto &&GetMDInt = [this](unsigned V) { 3168 return llvm::ConstantAsMetadata::get( 3169 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3170 }; 3171 3172 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3173 3174 // Create the offloading info metadata node. 3175 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3176 3177 // Create function that emits metadata for each target region entry; 3178 auto &&TargetRegionMetadataEmitter = 3179 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3180 &GetMDString]( 3181 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3182 unsigned Line, 3183 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3184 // Generate metadata for target regions. Each entry of this metadata 3185 // contains: 3186 // - Entry 0 -> Kind of this type of metadata (0). 3187 // - Entry 1 -> Device ID of the file where the entry was identified. 3188 // - Entry 2 -> File ID of the file where the entry was identified. 3189 // - Entry 3 -> Mangled name of the function where the entry was 3190 // identified. 3191 // - Entry 4 -> Line in the file where the entry was identified. 3192 // - Entry 5 -> Order the entry was created. 3193 // The first element of the metadata node is the kind. 3194 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3195 GetMDInt(FileID), GetMDString(ParentName), 3196 GetMDInt(Line), GetMDInt(E.getOrder())}; 3197 3198 SourceLocation Loc; 3199 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3200 E = CGM.getContext().getSourceManager().fileinfo_end(); 3201 I != E; ++I) { 3202 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3203 I->getFirst()->getUniqueID().getFile() == FileID) { 3204 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3205 I->getFirst(), Line, 1); 3206 break; 3207 } 3208 } 3209 // Save this entry in the right position of the ordered entries array. 3210 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3211 ParentFunctions[E.getOrder()] = ParentName; 3212 3213 // Add metadata to the named metadata node. 3214 MD->addOperand(llvm::MDNode::get(C, Ops)); 3215 }; 3216 3217 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3218 TargetRegionMetadataEmitter); 3219 3220 // Create function that emits metadata for each device global variable entry; 3221 auto &&DeviceGlobalVarMetadataEmitter = 3222 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3223 MD](StringRef MangledName, 3224 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3225 &E) { 3226 // Generate metadata for global variables. Each entry of this metadata 3227 // contains: 3228 // - Entry 0 -> Kind of this type of metadata (1). 3229 // - Entry 1 -> Mangled name of the variable. 3230 // - Entry 2 -> Declare target kind. 3231 // - Entry 3 -> Order the entry was created. 3232 // The first element of the metadata node is the kind. 3233 llvm::Metadata *Ops[] = { 3234 GetMDInt(E.getKind()), GetMDString(MangledName), 3235 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3236 3237 // Save this entry in the right position of the ordered entries array. 3238 OrderedEntries[E.getOrder()] = 3239 std::make_tuple(&E, SourceLocation(), MangledName); 3240 3241 // Add metadata to the named metadata node. 3242 MD->addOperand(llvm::MDNode::get(C, Ops)); 3243 }; 3244 3245 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3246 DeviceGlobalVarMetadataEmitter); 3247 3248 for (const auto &E : OrderedEntries) { 3249 assert(std::get<0>(E) && "All ordered entries must exist!"); 3250 if (const auto *CE = 3251 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3252 std::get<0>(E))) { 3253 if (!CE->getID() || !CE->getAddress()) { 3254 // Do not blame the entry if the parent funtion is not emitted. 3255 StringRef FnName = ParentFunctions[CE->getOrder()]; 3256 if (!CGM.GetGlobalValue(FnName)) 3257 continue; 3258 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3259 DiagnosticsEngine::Error, 3260 "Offloading entry for target region in %0 is incorrect: either the " 3261 "address or the ID is invalid."); 3262 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3263 continue; 3264 } 3265 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3266 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3267 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3268 OffloadEntryInfoDeviceGlobalVar>( 3269 std::get<0>(E))) { 3270 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3271 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3272 CE->getFlags()); 3273 switch (Flags) { 3274 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3275 if (CGM.getLangOpts().OpenMPIsDevice && 3276 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3277 continue; 3278 if (!CE->getAddress()) { 3279 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3280 DiagnosticsEngine::Error, "Offloading entry for declare target " 3281 "variable %0 is incorrect: the " 3282 "address is invalid."); 3283 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3284 continue; 3285 } 3286 // The vaiable has no definition - no need to add the entry. 3287 if (CE->getVarSize().isZero()) 3288 continue; 3289 break; 3290 } 3291 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3292 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3293 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3294 "Declaret target link address is set."); 3295 if (CGM.getLangOpts().OpenMPIsDevice) 3296 continue; 3297 if (!CE->getAddress()) { 3298 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3299 DiagnosticsEngine::Error, 3300 "Offloading entry for declare target variable is incorrect: the " 3301 "address is invalid."); 3302 CGM.getDiags().Report(DiagID); 3303 continue; 3304 } 3305 break; 3306 } 3307 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3308 CE->getVarSize().getQuantity(), Flags, 3309 CE->getLinkage()); 3310 } else { 3311 llvm_unreachable("Unsupported entry kind."); 3312 } 3313 } 3314 } 3315 3316 /// Loads all the offload entries information from the host IR 3317 /// metadata. 3318 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3319 // If we are in target mode, load the metadata from the host IR. This code has 3320 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3321 3322 if (!CGM.getLangOpts().OpenMPIsDevice) 3323 return; 3324 3325 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3326 return; 3327 3328 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3329 if (auto EC = Buf.getError()) { 3330 CGM.getDiags().Report(diag::err_cannot_open_file) 3331 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3332 return; 3333 } 3334 3335 llvm::LLVMContext C; 3336 auto ME = expectedToErrorOrAndEmitErrors( 3337 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3338 3339 if (auto EC = ME.getError()) { 3340 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3341 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3342 CGM.getDiags().Report(DiagID) 3343 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3344 return; 3345 } 3346 3347 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3348 if (!MD) 3349 return; 3350 3351 for (llvm::MDNode *MN : MD->operands()) { 3352 auto &&GetMDInt = [MN](unsigned Idx) { 3353 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3354 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3355 }; 3356 3357 auto &&GetMDString = [MN](unsigned Idx) { 3358 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3359 return V->getString(); 3360 }; 3361 3362 switch (GetMDInt(0)) { 3363 default: 3364 llvm_unreachable("Unexpected metadata!"); 3365 break; 3366 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3367 OffloadingEntryInfoTargetRegion: 3368 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3369 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3370 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3371 /*Order=*/GetMDInt(5)); 3372 break; 3373 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3374 OffloadingEntryInfoDeviceGlobalVar: 3375 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3376 /*MangledName=*/GetMDString(1), 3377 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3378 /*Flags=*/GetMDInt(2)), 3379 /*Order=*/GetMDInt(3)); 3380 break; 3381 } 3382 } 3383 } 3384 3385 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3386 if (!KmpRoutineEntryPtrTy) { 3387 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3388 ASTContext &C = CGM.getContext(); 3389 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3390 FunctionProtoType::ExtProtoInfo EPI; 3391 KmpRoutineEntryPtrQTy = C.getPointerType( 3392 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3393 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3394 } 3395 } 3396 3397 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3398 // Make sure the type of the entry is already created. This is the type we 3399 // have to create: 3400 // struct __tgt_offload_entry{ 3401 // void *addr; // Pointer to the offload entry info. 3402 // // (function or global) 3403 // char *name; // Name of the function or global. 3404 // size_t size; // Size of the entry info (0 if it a function). 3405 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3406 // int32_t reserved; // Reserved, to use by the runtime library. 3407 // }; 3408 if (TgtOffloadEntryQTy.isNull()) { 3409 ASTContext &C = CGM.getContext(); 3410 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3411 RD->startDefinition(); 3412 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3413 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3414 addFieldToRecordDecl(C, RD, C.getSizeType()); 3415 addFieldToRecordDecl( 3416 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3417 addFieldToRecordDecl( 3418 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3419 RD->completeDefinition(); 3420 RD->addAttr(PackedAttr::CreateImplicit(C)); 3421 TgtOffloadEntryQTy = C.getRecordType(RD); 3422 } 3423 return TgtOffloadEntryQTy; 3424 } 3425 3426 namespace { 3427 struct PrivateHelpersTy { 3428 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3429 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3430 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3431 PrivateElemInit(PrivateElemInit) {} 3432 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3433 const Expr *OriginalRef = nullptr; 3434 const VarDecl *Original = nullptr; 3435 const VarDecl *PrivateCopy = nullptr; 3436 const VarDecl *PrivateElemInit = nullptr; 3437 bool isLocalPrivate() const { 3438 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3439 } 3440 }; 3441 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3442 } // anonymous namespace 3443 3444 static bool isAllocatableDecl(const VarDecl *VD) { 3445 const VarDecl *CVD = VD->getCanonicalDecl(); 3446 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3447 return false; 3448 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3449 // Use the default allocation. 3450 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3451 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3452 !AA->getAllocator()); 3453 } 3454 3455 static RecordDecl * 3456 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3457 if (!Privates.empty()) { 3458 ASTContext &C = CGM.getContext(); 3459 // Build struct .kmp_privates_t. { 3460 // /* private vars */ 3461 // }; 3462 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3463 RD->startDefinition(); 3464 for (const auto &Pair : Privates) { 3465 const VarDecl *VD = Pair.second.Original; 3466 QualType Type = VD->getType().getNonReferenceType(); 3467 // If the private variable is a local variable with lvalue ref type, 3468 // allocate the pointer instead of the pointee type. 3469 if (Pair.second.isLocalPrivate()) { 3470 if (VD->getType()->isLValueReferenceType()) 3471 Type = C.getPointerType(Type); 3472 if (isAllocatableDecl(VD)) 3473 Type = C.getPointerType(Type); 3474 } 3475 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3476 if (VD->hasAttrs()) { 3477 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3478 E(VD->getAttrs().end()); 3479 I != E; ++I) 3480 FD->addAttr(*I); 3481 } 3482 } 3483 RD->completeDefinition(); 3484 return RD; 3485 } 3486 return nullptr; 3487 } 3488 3489 static RecordDecl * 3490 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3491 QualType KmpInt32Ty, 3492 QualType KmpRoutineEntryPointerQTy) { 3493 ASTContext &C = CGM.getContext(); 3494 // Build struct kmp_task_t { 3495 // void * shareds; 3496 // kmp_routine_entry_t routine; 3497 // kmp_int32 part_id; 3498 // kmp_cmplrdata_t data1; 3499 // kmp_cmplrdata_t data2; 3500 // For taskloops additional fields: 3501 // kmp_uint64 lb; 3502 // kmp_uint64 ub; 3503 // kmp_int64 st; 3504 // kmp_int32 liter; 3505 // void * reductions; 3506 // }; 3507 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3508 UD->startDefinition(); 3509 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3510 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3511 UD->completeDefinition(); 3512 QualType KmpCmplrdataTy = C.getRecordType(UD); 3513 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3514 RD->startDefinition(); 3515 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3516 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3517 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3518 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3519 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3520 if (isOpenMPTaskLoopDirective(Kind)) { 3521 QualType KmpUInt64Ty = 3522 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3523 QualType KmpInt64Ty = 3524 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3525 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3526 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3527 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3528 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3529 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3530 } 3531 RD->completeDefinition(); 3532 return RD; 3533 } 3534 3535 static RecordDecl * 3536 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3537 ArrayRef<PrivateDataTy> Privates) { 3538 ASTContext &C = CGM.getContext(); 3539 // Build struct kmp_task_t_with_privates { 3540 // kmp_task_t task_data; 3541 // .kmp_privates_t. privates; 3542 // }; 3543 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3544 RD->startDefinition(); 3545 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3546 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3547 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3548 RD->completeDefinition(); 3549 return RD; 3550 } 3551 3552 /// Emit a proxy function which accepts kmp_task_t as the second 3553 /// argument. 3554 /// \code 3555 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3556 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3557 /// For taskloops: 3558 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3559 /// tt->reductions, tt->shareds); 3560 /// return 0; 3561 /// } 3562 /// \endcode 3563 static llvm::Function * 3564 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3565 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3566 QualType KmpTaskTWithPrivatesPtrQTy, 3567 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3568 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3569 llvm::Value *TaskPrivatesMap) { 3570 ASTContext &C = CGM.getContext(); 3571 FunctionArgList Args; 3572 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3573 ImplicitParamDecl::Other); 3574 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3575 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3576 ImplicitParamDecl::Other); 3577 Args.push_back(&GtidArg); 3578 Args.push_back(&TaskTypeArg); 3579 const auto &TaskEntryFnInfo = 3580 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3581 llvm::FunctionType *TaskEntryTy = 3582 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3583 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3584 auto *TaskEntry = llvm::Function::Create( 3585 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3586 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3587 TaskEntry->setDoesNotRecurse(); 3588 CodeGenFunction CGF(CGM); 3589 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3590 Loc, Loc); 3591 3592 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3593 // tt, 3594 // For taskloops: 3595 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3596 // tt->task_data.shareds); 3597 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3598 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3599 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3600 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3601 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3602 const auto *KmpTaskTWithPrivatesQTyRD = 3603 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3604 LValue Base = 3605 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3606 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3607 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3608 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3609 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3610 3611 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3612 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3613 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3614 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3615 CGF.ConvertTypeForMem(SharedsPtrTy)); 3616 3617 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3618 llvm::Value *PrivatesParam; 3619 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3620 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3621 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3622 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3623 } else { 3624 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3625 } 3626 3627 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3628 TaskPrivatesMap, 3629 CGF.Builder 3630 .CreatePointerBitCastOrAddrSpaceCast( 3631 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3632 .getPointer()}; 3633 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3634 std::end(CommonArgs)); 3635 if (isOpenMPTaskLoopDirective(Kind)) { 3636 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3637 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3638 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3639 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3640 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3641 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3642 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3643 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3644 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3645 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3646 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3647 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3648 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3649 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3650 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3651 CallArgs.push_back(LBParam); 3652 CallArgs.push_back(UBParam); 3653 CallArgs.push_back(StParam); 3654 CallArgs.push_back(LIParam); 3655 CallArgs.push_back(RParam); 3656 } 3657 CallArgs.push_back(SharedsParam); 3658 3659 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3660 CallArgs); 3661 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3662 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3663 CGF.FinishFunction(); 3664 return TaskEntry; 3665 } 3666 3667 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3668 SourceLocation Loc, 3669 QualType KmpInt32Ty, 3670 QualType KmpTaskTWithPrivatesPtrQTy, 3671 QualType KmpTaskTWithPrivatesQTy) { 3672 ASTContext &C = CGM.getContext(); 3673 FunctionArgList Args; 3674 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3675 ImplicitParamDecl::Other); 3676 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3677 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3678 ImplicitParamDecl::Other); 3679 Args.push_back(&GtidArg); 3680 Args.push_back(&TaskTypeArg); 3681 const auto &DestructorFnInfo = 3682 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3683 llvm::FunctionType *DestructorFnTy = 3684 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3685 std::string Name = 3686 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3687 auto *DestructorFn = 3688 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3689 Name, &CGM.getModule()); 3690 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3691 DestructorFnInfo); 3692 DestructorFn->setDoesNotRecurse(); 3693 CodeGenFunction CGF(CGM); 3694 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3695 Args, Loc, Loc); 3696 3697 LValue Base = CGF.EmitLoadOfPointerLValue( 3698 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3699 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3700 const auto *KmpTaskTWithPrivatesQTyRD = 3701 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3702 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3703 Base = CGF.EmitLValueForField(Base, *FI); 3704 for (const auto *Field : 3705 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3706 if (QualType::DestructionKind DtorKind = 3707 Field->getType().isDestructedType()) { 3708 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3709 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3710 } 3711 } 3712 CGF.FinishFunction(); 3713 return DestructorFn; 3714 } 3715 3716 /// Emit a privates mapping function for correct handling of private and 3717 /// firstprivate variables. 3718 /// \code 3719 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3720 /// **noalias priv1,..., <tyn> **noalias privn) { 3721 /// *priv1 = &.privates.priv1; 3722 /// ...; 3723 /// *privn = &.privates.privn; 3724 /// } 3725 /// \endcode 3726 static llvm::Value * 3727 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3728 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3729 ArrayRef<PrivateDataTy> Privates) { 3730 ASTContext &C = CGM.getContext(); 3731 FunctionArgList Args; 3732 ImplicitParamDecl TaskPrivatesArg( 3733 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3734 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3735 ImplicitParamDecl::Other); 3736 Args.push_back(&TaskPrivatesArg); 3737 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3738 unsigned Counter = 1; 3739 for (const Expr *E : Data.PrivateVars) { 3740 Args.push_back(ImplicitParamDecl::Create( 3741 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3742 C.getPointerType(C.getPointerType(E->getType())) 3743 .withConst() 3744 .withRestrict(), 3745 ImplicitParamDecl::Other)); 3746 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3747 PrivateVarsPos[VD] = Counter; 3748 ++Counter; 3749 } 3750 for (const Expr *E : Data.FirstprivateVars) { 3751 Args.push_back(ImplicitParamDecl::Create( 3752 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3753 C.getPointerType(C.getPointerType(E->getType())) 3754 .withConst() 3755 .withRestrict(), 3756 ImplicitParamDecl::Other)); 3757 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3758 PrivateVarsPos[VD] = Counter; 3759 ++Counter; 3760 } 3761 for (const Expr *E : Data.LastprivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const VarDecl *VD : Data.PrivateLocals) { 3773 QualType Ty = VD->getType().getNonReferenceType(); 3774 if (VD->getType()->isLValueReferenceType()) 3775 Ty = C.getPointerType(Ty); 3776 if (isAllocatableDecl(VD)) 3777 Ty = C.getPointerType(Ty); 3778 Args.push_back(ImplicitParamDecl::Create( 3779 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3780 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3781 ImplicitParamDecl::Other)); 3782 PrivateVarsPos[VD] = Counter; 3783 ++Counter; 3784 } 3785 const auto &TaskPrivatesMapFnInfo = 3786 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3787 llvm::FunctionType *TaskPrivatesMapTy = 3788 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3789 std::string Name = 3790 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3791 auto *TaskPrivatesMap = llvm::Function::Create( 3792 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3793 &CGM.getModule()); 3794 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3795 TaskPrivatesMapFnInfo); 3796 if (CGM.getLangOpts().Optimize) { 3797 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3798 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3799 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3800 } 3801 CodeGenFunction CGF(CGM); 3802 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3803 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3804 3805 // *privi = &.privates.privi; 3806 LValue Base = CGF.EmitLoadOfPointerLValue( 3807 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3808 TaskPrivatesArg.getType()->castAs<PointerType>()); 3809 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3810 Counter = 0; 3811 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3812 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3813 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3814 LValue RefLVal = 3815 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3816 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3817 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3818 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3819 ++Counter; 3820 } 3821 CGF.FinishFunction(); 3822 return TaskPrivatesMap; 3823 } 3824 3825 /// Emit initialization for private variables in task-based directives. 3826 static void emitPrivatesInit(CodeGenFunction &CGF, 3827 const OMPExecutableDirective &D, 3828 Address KmpTaskSharedsPtr, LValue TDBase, 3829 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3830 QualType SharedsTy, QualType SharedsPtrTy, 3831 const OMPTaskDataTy &Data, 3832 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3833 ASTContext &C = CGF.getContext(); 3834 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3835 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3836 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3837 ? OMPD_taskloop 3838 : OMPD_task; 3839 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3840 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3841 LValue SrcBase; 3842 bool IsTargetTask = 3843 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3844 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3845 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3846 // PointersArray, SizesArray, and MappersArray. The original variables for 3847 // these arrays are not captured and we get their addresses explicitly. 3848 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3849 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3850 SrcBase = CGF.MakeAddrLValue( 3851 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3852 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3853 SharedsTy); 3854 } 3855 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3856 for (const PrivateDataTy &Pair : Privates) { 3857 // Do not initialize private locals. 3858 if (Pair.second.isLocalPrivate()) { 3859 ++FI; 3860 continue; 3861 } 3862 const VarDecl *VD = Pair.second.PrivateCopy; 3863 const Expr *Init = VD->getAnyInitializer(); 3864 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3865 !CGF.isTrivialInitializer(Init)))) { 3866 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3867 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3868 const VarDecl *OriginalVD = Pair.second.Original; 3869 // Check if the variable is the target-based BasePointersArray, 3870 // PointersArray, SizesArray, or MappersArray. 3871 LValue SharedRefLValue; 3872 QualType Type = PrivateLValue.getType(); 3873 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3874 if (IsTargetTask && !SharedField) { 3875 assert(isa<ImplicitParamDecl>(OriginalVD) && 3876 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3877 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3878 ->getNumParams() == 0 && 3879 isa<TranslationUnitDecl>( 3880 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3881 ->getDeclContext()) && 3882 "Expected artificial target data variable."); 3883 SharedRefLValue = 3884 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3885 } else if (ForDup) { 3886 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3887 SharedRefLValue = CGF.MakeAddrLValue( 3888 Address(SharedRefLValue.getPointer(CGF), 3889 C.getDeclAlign(OriginalVD)), 3890 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3891 SharedRefLValue.getTBAAInfo()); 3892 } else if (CGF.LambdaCaptureFields.count( 3893 Pair.second.Original->getCanonicalDecl()) > 0 || 3894 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3895 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3896 } else { 3897 // Processing for implicitly captured variables. 3898 InlinedOpenMPRegionRAII Region( 3899 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3900 /*HasCancel=*/false, /*NoInheritance=*/true); 3901 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3902 } 3903 if (Type->isArrayType()) { 3904 // Initialize firstprivate array. 3905 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3906 // Perform simple memcpy. 3907 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3908 } else { 3909 // Initialize firstprivate array using element-by-element 3910 // initialization. 3911 CGF.EmitOMPAggregateAssign( 3912 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3913 Type, 3914 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3915 Address SrcElement) { 3916 // Clean up any temporaries needed by the initialization. 3917 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3918 InitScope.addPrivate( 3919 Elem, [SrcElement]() -> Address { return SrcElement; }); 3920 (void)InitScope.Privatize(); 3921 // Emit initialization for single element. 3922 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3923 CGF, &CapturesInfo); 3924 CGF.EmitAnyExprToMem(Init, DestElement, 3925 Init->getType().getQualifiers(), 3926 /*IsInitializer=*/false); 3927 }); 3928 } 3929 } else { 3930 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3931 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3932 return SharedRefLValue.getAddress(CGF); 3933 }); 3934 (void)InitScope.Privatize(); 3935 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3936 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3937 /*capturedByInit=*/false); 3938 } 3939 } else { 3940 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3941 } 3942 } 3943 ++FI; 3944 } 3945 } 3946 3947 /// Check if duplication function is required for taskloops. 3948 static bool checkInitIsRequired(CodeGenFunction &CGF, 3949 ArrayRef<PrivateDataTy> Privates) { 3950 bool InitRequired = false; 3951 for (const PrivateDataTy &Pair : Privates) { 3952 if (Pair.second.isLocalPrivate()) 3953 continue; 3954 const VarDecl *VD = Pair.second.PrivateCopy; 3955 const Expr *Init = VD->getAnyInitializer(); 3956 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3957 !CGF.isTrivialInitializer(Init)); 3958 if (InitRequired) 3959 break; 3960 } 3961 return InitRequired; 3962 } 3963 3964 3965 /// Emit task_dup function (for initialization of 3966 /// private/firstprivate/lastprivate vars and last_iter flag) 3967 /// \code 3968 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3969 /// lastpriv) { 3970 /// // setup lastprivate flag 3971 /// task_dst->last = lastpriv; 3972 /// // could be constructor calls here... 3973 /// } 3974 /// \endcode 3975 static llvm::Value * 3976 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3977 const OMPExecutableDirective &D, 3978 QualType KmpTaskTWithPrivatesPtrQTy, 3979 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3980 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3981 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3982 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3983 ASTContext &C = CGM.getContext(); 3984 FunctionArgList Args; 3985 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3986 KmpTaskTWithPrivatesPtrQTy, 3987 ImplicitParamDecl::Other); 3988 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3989 KmpTaskTWithPrivatesPtrQTy, 3990 ImplicitParamDecl::Other); 3991 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3992 ImplicitParamDecl::Other); 3993 Args.push_back(&DstArg); 3994 Args.push_back(&SrcArg); 3995 Args.push_back(&LastprivArg); 3996 const auto &TaskDupFnInfo = 3997 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3998 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3999 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4000 auto *TaskDup = llvm::Function::Create( 4001 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4002 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4003 TaskDup->setDoesNotRecurse(); 4004 CodeGenFunction CGF(CGM); 4005 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4006 Loc); 4007 4008 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4009 CGF.GetAddrOfLocalVar(&DstArg), 4010 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4011 // task_dst->liter = lastpriv; 4012 if (WithLastIter) { 4013 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4014 LValue Base = CGF.EmitLValueForField( 4015 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4016 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4017 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4018 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4019 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4020 } 4021 4022 // Emit initial values for private copies (if any). 4023 assert(!Privates.empty()); 4024 Address KmpTaskSharedsPtr = Address::invalid(); 4025 if (!Data.FirstprivateVars.empty()) { 4026 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4027 CGF.GetAddrOfLocalVar(&SrcArg), 4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4029 LValue Base = CGF.EmitLValueForField( 4030 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4031 KmpTaskSharedsPtr = Address( 4032 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4033 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4034 KmpTaskTShareds)), 4035 Loc), 4036 CGM.getNaturalTypeAlignment(SharedsTy)); 4037 } 4038 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4039 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4040 CGF.FinishFunction(); 4041 return TaskDup; 4042 } 4043 4044 /// Checks if destructor function is required to be generated. 4045 /// \return true if cleanups are required, false otherwise. 4046 static bool 4047 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4048 ArrayRef<PrivateDataTy> Privates) { 4049 for (const PrivateDataTy &P : Privates) { 4050 if (P.second.isLocalPrivate()) 4051 continue; 4052 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4053 if (Ty.isDestructedType()) 4054 return true; 4055 } 4056 return false; 4057 } 4058 4059 namespace { 4060 /// Loop generator for OpenMP iterator expression. 4061 class OMPIteratorGeneratorScope final 4062 : public CodeGenFunction::OMPPrivateScope { 4063 CodeGenFunction &CGF; 4064 const OMPIteratorExpr *E = nullptr; 4065 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4066 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4067 OMPIteratorGeneratorScope() = delete; 4068 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4069 4070 public: 4071 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4072 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4073 if (!E) 4074 return; 4075 SmallVector<llvm::Value *, 4> Uppers; 4076 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4077 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4078 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4079 addPrivate(VD, [&CGF, VD]() { 4080 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4081 }); 4082 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4083 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4084 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4085 "counter.addr"); 4086 }); 4087 } 4088 Privatize(); 4089 4090 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4091 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4092 LValue CLVal = 4093 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4094 HelperData.CounterVD->getType()); 4095 // Counter = 0; 4096 CGF.EmitStoreOfScalar( 4097 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4098 CLVal); 4099 CodeGenFunction::JumpDest &ContDest = 4100 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4101 CodeGenFunction::JumpDest &ExitDest = 4102 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4103 // N = <number-of_iterations>; 4104 llvm::Value *N = Uppers[I]; 4105 // cont: 4106 // if (Counter < N) goto body; else goto exit; 4107 CGF.EmitBlock(ContDest.getBlock()); 4108 auto *CVal = 4109 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4110 llvm::Value *Cmp = 4111 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4112 ? CGF.Builder.CreateICmpSLT(CVal, N) 4113 : CGF.Builder.CreateICmpULT(CVal, N); 4114 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4115 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4116 // body: 4117 CGF.EmitBlock(BodyBB); 4118 // Iteri = Begini + Counter * Stepi; 4119 CGF.EmitIgnoredExpr(HelperData.Update); 4120 } 4121 } 4122 ~OMPIteratorGeneratorScope() { 4123 if (!E) 4124 return; 4125 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4126 // Counter = Counter + 1; 4127 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4128 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4129 // goto cont; 4130 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4131 // exit: 4132 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4133 } 4134 } 4135 }; 4136 } // namespace 4137 4138 static std::pair<llvm::Value *, llvm::Value *> 4139 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4140 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4141 llvm::Value *Addr; 4142 if (OASE) { 4143 const Expr *Base = OASE->getBase(); 4144 Addr = CGF.EmitScalarExpr(Base); 4145 } else { 4146 Addr = CGF.EmitLValue(E).getPointer(CGF); 4147 } 4148 llvm::Value *SizeVal; 4149 QualType Ty = E->getType(); 4150 if (OASE) { 4151 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4152 for (const Expr *SE : OASE->getDimensions()) { 4153 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4154 Sz = CGF.EmitScalarConversion( 4155 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4156 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4157 } 4158 } else if (const auto *ASE = 4159 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4160 LValue UpAddrLVal = 4161 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4162 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4163 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4164 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4165 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4166 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4167 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4168 } else { 4169 SizeVal = CGF.getTypeSize(Ty); 4170 } 4171 return std::make_pair(Addr, SizeVal); 4172 } 4173 4174 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4175 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4176 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4177 if (KmpTaskAffinityInfoTy.isNull()) { 4178 RecordDecl *KmpAffinityInfoRD = 4179 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4180 KmpAffinityInfoRD->startDefinition(); 4181 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4182 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4183 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4184 KmpAffinityInfoRD->completeDefinition(); 4185 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4186 } 4187 } 4188 4189 CGOpenMPRuntime::TaskResultTy 4190 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4191 const OMPExecutableDirective &D, 4192 llvm::Function *TaskFunction, QualType SharedsTy, 4193 Address Shareds, const OMPTaskDataTy &Data) { 4194 ASTContext &C = CGM.getContext(); 4195 llvm::SmallVector<PrivateDataTy, 4> Privates; 4196 // Aggregate privates and sort them by the alignment. 4197 const auto *I = Data.PrivateCopies.begin(); 4198 for (const Expr *E : Data.PrivateVars) { 4199 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4200 Privates.emplace_back( 4201 C.getDeclAlign(VD), 4202 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4203 /*PrivateElemInit=*/nullptr)); 4204 ++I; 4205 } 4206 I = Data.FirstprivateCopies.begin(); 4207 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4208 for (const Expr *E : Data.FirstprivateVars) { 4209 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4210 Privates.emplace_back( 4211 C.getDeclAlign(VD), 4212 PrivateHelpersTy( 4213 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4214 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4215 ++I; 4216 ++IElemInitRef; 4217 } 4218 I = Data.LastprivateCopies.begin(); 4219 for (const Expr *E : Data.LastprivateVars) { 4220 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4221 Privates.emplace_back( 4222 C.getDeclAlign(VD), 4223 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4224 /*PrivateElemInit=*/nullptr)); 4225 ++I; 4226 } 4227 for (const VarDecl *VD : Data.PrivateLocals) { 4228 if (isAllocatableDecl(VD)) 4229 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4230 else 4231 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4232 } 4233 llvm::stable_sort(Privates, 4234 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4235 return L.first > R.first; 4236 }); 4237 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4238 // Build type kmp_routine_entry_t (if not built yet). 4239 emitKmpRoutineEntryT(KmpInt32Ty); 4240 // Build type kmp_task_t (if not built yet). 4241 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4242 if (SavedKmpTaskloopTQTy.isNull()) { 4243 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4244 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4245 } 4246 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4247 } else { 4248 assert((D.getDirectiveKind() == OMPD_task || 4249 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4250 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4251 "Expected taskloop, task or target directive"); 4252 if (SavedKmpTaskTQTy.isNull()) { 4253 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4254 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4255 } 4256 KmpTaskTQTy = SavedKmpTaskTQTy; 4257 } 4258 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4259 // Build particular struct kmp_task_t for the given task. 4260 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4261 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4262 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4263 QualType KmpTaskTWithPrivatesPtrQTy = 4264 C.getPointerType(KmpTaskTWithPrivatesQTy); 4265 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4266 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4267 KmpTaskTWithPrivatesTy->getPointerTo(); 4268 llvm::Value *KmpTaskTWithPrivatesTySize = 4269 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4270 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4271 4272 // Emit initial values for private copies (if any). 4273 llvm::Value *TaskPrivatesMap = nullptr; 4274 llvm::Type *TaskPrivatesMapTy = 4275 std::next(TaskFunction->arg_begin(), 3)->getType(); 4276 if (!Privates.empty()) { 4277 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4278 TaskPrivatesMap = 4279 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4280 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4281 TaskPrivatesMap, TaskPrivatesMapTy); 4282 } else { 4283 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4284 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4285 } 4286 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4287 // kmp_task_t *tt); 4288 llvm::Function *TaskEntry = emitProxyTaskFunction( 4289 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4290 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4291 TaskPrivatesMap); 4292 4293 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4294 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4295 // kmp_routine_entry_t *task_entry); 4296 // Task flags. Format is taken from 4297 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4298 // description of kmp_tasking_flags struct. 4299 enum { 4300 TiedFlag = 0x1, 4301 FinalFlag = 0x2, 4302 DestructorsFlag = 0x8, 4303 PriorityFlag = 0x20, 4304 DetachableFlag = 0x40, 4305 }; 4306 unsigned Flags = Data.Tied ? TiedFlag : 0; 4307 bool NeedsCleanup = false; 4308 if (!Privates.empty()) { 4309 NeedsCleanup = 4310 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4311 if (NeedsCleanup) 4312 Flags = Flags | DestructorsFlag; 4313 } 4314 if (Data.Priority.getInt()) 4315 Flags = Flags | PriorityFlag; 4316 if (D.hasClausesOfKind<OMPDetachClause>()) 4317 Flags = Flags | DetachableFlag; 4318 llvm::Value *TaskFlags = 4319 Data.Final.getPointer() 4320 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4321 CGF.Builder.getInt32(FinalFlag), 4322 CGF.Builder.getInt32(/*C=*/0)) 4323 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4324 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4325 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4326 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4327 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4328 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4329 TaskEntry, KmpRoutineEntryPtrTy)}; 4330 llvm::Value *NewTask; 4331 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4332 // Check if we have any device clause associated with the directive. 4333 const Expr *Device = nullptr; 4334 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4335 Device = C->getDevice(); 4336 // Emit device ID if any otherwise use default value. 4337 llvm::Value *DeviceID; 4338 if (Device) 4339 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4340 CGF.Int64Ty, /*isSigned=*/true); 4341 else 4342 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4343 AllocArgs.push_back(DeviceID); 4344 NewTask = CGF.EmitRuntimeCall( 4345 OMPBuilder.getOrCreateRuntimeFunction( 4346 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4347 AllocArgs); 4348 } else { 4349 NewTask = 4350 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4351 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4352 AllocArgs); 4353 } 4354 // Emit detach clause initialization. 4355 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4356 // task_descriptor); 4357 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4358 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4359 LValue EvtLVal = CGF.EmitLValue(Evt); 4360 4361 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4362 // int gtid, kmp_task_t *task); 4363 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4364 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4365 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4366 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4369 {Loc, Tid, NewTask}); 4370 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4371 Evt->getExprLoc()); 4372 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4373 } 4374 // Process affinity clauses. 4375 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4376 // Process list of affinity data. 4377 ASTContext &C = CGM.getContext(); 4378 Address AffinitiesArray = Address::invalid(); 4379 // Calculate number of elements to form the array of affinity data. 4380 llvm::Value *NumOfElements = nullptr; 4381 unsigned NumAffinities = 0; 4382 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4383 if (const Expr *Modifier = C->getModifier()) { 4384 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4385 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4386 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4387 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4388 NumOfElements = 4389 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4390 } 4391 } else { 4392 NumAffinities += C->varlist_size(); 4393 } 4394 } 4395 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4396 // Fields ids in kmp_task_affinity_info record. 4397 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4398 4399 QualType KmpTaskAffinityInfoArrayTy; 4400 if (NumOfElements) { 4401 NumOfElements = CGF.Builder.CreateNUWAdd( 4402 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4403 OpaqueValueExpr OVE( 4404 Loc, 4405 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4406 VK_PRValue); 4407 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4408 RValue::get(NumOfElements)); 4409 KmpTaskAffinityInfoArrayTy = 4410 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4411 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4412 // Properly emit variable-sized array. 4413 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4414 ImplicitParamDecl::Other); 4415 CGF.EmitVarDecl(*PD); 4416 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4417 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4418 /*isSigned=*/false); 4419 } else { 4420 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4421 KmpTaskAffinityInfoTy, 4422 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4423 ArrayType::Normal, /*IndexTypeQuals=*/0); 4424 AffinitiesArray = 4425 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4426 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4427 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4428 /*isSigned=*/false); 4429 } 4430 4431 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4432 // Fill array by elements without iterators. 4433 unsigned Pos = 0; 4434 bool HasIterator = false; 4435 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4436 if (C->getModifier()) { 4437 HasIterator = true; 4438 continue; 4439 } 4440 for (const Expr *E : C->varlists()) { 4441 llvm::Value *Addr; 4442 llvm::Value *Size; 4443 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4444 LValue Base = 4445 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4446 KmpTaskAffinityInfoTy); 4447 // affs[i].base_addr = &<Affinities[i].second>; 4448 LValue BaseAddrLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4450 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4451 BaseAddrLVal); 4452 // affs[i].len = sizeof(<Affinities[i].second>); 4453 LValue LenLVal = CGF.EmitLValueForField( 4454 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4455 CGF.EmitStoreOfScalar(Size, LenLVal); 4456 ++Pos; 4457 } 4458 } 4459 LValue PosLVal; 4460 if (HasIterator) { 4461 PosLVal = CGF.MakeAddrLValue( 4462 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4463 C.getSizeType()); 4464 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4465 } 4466 // Process elements with iterators. 4467 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4468 const Expr *Modifier = C->getModifier(); 4469 if (!Modifier) 4470 continue; 4471 OMPIteratorGeneratorScope IteratorScope( 4472 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4473 for (const Expr *E : C->varlists()) { 4474 llvm::Value *Addr; 4475 llvm::Value *Size; 4476 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4477 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4478 LValue Base = CGF.MakeAddrLValue( 4479 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), 4480 AffinitiesArray.getPointer(), Idx), 4481 AffinitiesArray.getAlignment()), 4482 KmpTaskAffinityInfoTy); 4483 // affs[i].base_addr = &<Affinities[i].second>; 4484 LValue BaseAddrLVal = CGF.EmitLValueForField( 4485 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4486 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4487 BaseAddrLVal); 4488 // affs[i].len = sizeof(<Affinities[i].second>); 4489 LValue LenLVal = CGF.EmitLValueForField( 4490 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4491 CGF.EmitStoreOfScalar(Size, LenLVal); 4492 Idx = CGF.Builder.CreateNUWAdd( 4493 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4494 CGF.EmitStoreOfScalar(Idx, PosLVal); 4495 } 4496 } 4497 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4498 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4499 // naffins, kmp_task_affinity_info_t *affin_list); 4500 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4501 llvm::Value *GTid = getThreadID(CGF, Loc); 4502 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4503 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4504 // FIXME: Emit the function and ignore its result for now unless the 4505 // runtime function is properly implemented. 4506 (void)CGF.EmitRuntimeCall( 4507 OMPBuilder.getOrCreateRuntimeFunction( 4508 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4509 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4510 } 4511 llvm::Value *NewTaskNewTaskTTy = 4512 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4513 NewTask, KmpTaskTWithPrivatesPtrTy); 4514 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4515 KmpTaskTWithPrivatesQTy); 4516 LValue TDBase = 4517 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4518 // Fill the data in the resulting kmp_task_t record. 4519 // Copy shareds if there are any. 4520 Address KmpTaskSharedsPtr = Address::invalid(); 4521 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4522 KmpTaskSharedsPtr = 4523 Address(CGF.EmitLoadOfScalar( 4524 CGF.EmitLValueForField( 4525 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4526 KmpTaskTShareds)), 4527 Loc), 4528 CGM.getNaturalTypeAlignment(SharedsTy)); 4529 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4530 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4531 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4532 } 4533 // Emit initial values for private copies (if any). 4534 TaskResultTy Result; 4535 if (!Privates.empty()) { 4536 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4537 SharedsTy, SharedsPtrTy, Data, Privates, 4538 /*ForDup=*/false); 4539 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4540 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4541 Result.TaskDupFn = emitTaskDupFunction( 4542 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4543 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4544 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4545 } 4546 } 4547 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4548 enum { Priority = 0, Destructors = 1 }; 4549 // Provide pointer to function with destructors for privates. 4550 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4551 const RecordDecl *KmpCmplrdataUD = 4552 (*FI)->getType()->getAsUnionType()->getDecl(); 4553 if (NeedsCleanup) { 4554 llvm::Value *DestructorFn = emitDestructorsFunction( 4555 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4556 KmpTaskTWithPrivatesQTy); 4557 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4558 LValue DestructorsLV = CGF.EmitLValueForField( 4559 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4560 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4561 DestructorFn, KmpRoutineEntryPtrTy), 4562 DestructorsLV); 4563 } 4564 // Set priority. 4565 if (Data.Priority.getInt()) { 4566 LValue Data2LV = CGF.EmitLValueForField( 4567 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4568 LValue PriorityLV = CGF.EmitLValueForField( 4569 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4570 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4571 } 4572 Result.NewTask = NewTask; 4573 Result.TaskEntry = TaskEntry; 4574 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4575 Result.TDBase = TDBase; 4576 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4577 return Result; 4578 } 4579 4580 namespace { 4581 /// Dependence kind for RTL. 4582 enum RTLDependenceKindTy { 4583 DepIn = 0x01, 4584 DepInOut = 0x3, 4585 DepMutexInOutSet = 0x4 4586 }; 4587 /// Fields ids in kmp_depend_info record. 4588 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4589 } // namespace 4590 4591 /// Translates internal dependency kind into the runtime kind. 4592 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4593 RTLDependenceKindTy DepKind; 4594 switch (K) { 4595 case OMPC_DEPEND_in: 4596 DepKind = DepIn; 4597 break; 4598 // Out and InOut dependencies must use the same code. 4599 case OMPC_DEPEND_out: 4600 case OMPC_DEPEND_inout: 4601 DepKind = DepInOut; 4602 break; 4603 case OMPC_DEPEND_mutexinoutset: 4604 DepKind = DepMutexInOutSet; 4605 break; 4606 case OMPC_DEPEND_source: 4607 case OMPC_DEPEND_sink: 4608 case OMPC_DEPEND_depobj: 4609 case OMPC_DEPEND_unknown: 4610 llvm_unreachable("Unknown task dependence type"); 4611 } 4612 return DepKind; 4613 } 4614 4615 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4616 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4617 QualType &FlagsTy) { 4618 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4619 if (KmpDependInfoTy.isNull()) { 4620 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4621 KmpDependInfoRD->startDefinition(); 4622 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4623 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4624 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4625 KmpDependInfoRD->completeDefinition(); 4626 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4627 } 4628 } 4629 4630 std::pair<llvm::Value *, LValue> 4631 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4632 SourceLocation Loc) { 4633 ASTContext &C = CGM.getContext(); 4634 QualType FlagsTy; 4635 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4636 RecordDecl *KmpDependInfoRD = 4637 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4638 LValue Base = CGF.EmitLoadOfPointerLValue( 4639 DepobjLVal.getAddress(CGF), 4640 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4641 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4642 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4643 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4644 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4645 Base.getTBAAInfo()); 4646 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4647 Addr.getElementType(), Addr.getPointer(), 4648 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4649 LValue NumDepsBase = CGF.MakeAddrLValue( 4650 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4651 Base.getBaseInfo(), Base.getTBAAInfo()); 4652 // NumDeps = deps[i].base_addr; 4653 LValue BaseAddrLVal = CGF.EmitLValueForField( 4654 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4655 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4656 return std::make_pair(NumDeps, Base); 4657 } 4658 4659 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4660 llvm::PointerUnion<unsigned *, LValue *> Pos, 4661 const OMPTaskDataTy::DependData &Data, 4662 Address DependenciesArray) { 4663 CodeGenModule &CGM = CGF.CGM; 4664 ASTContext &C = CGM.getContext(); 4665 QualType FlagsTy; 4666 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4667 RecordDecl *KmpDependInfoRD = 4668 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4669 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4670 4671 OMPIteratorGeneratorScope IteratorScope( 4672 CGF, cast_or_null<OMPIteratorExpr>( 4673 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4674 : nullptr)); 4675 for (const Expr *E : Data.DepExprs) { 4676 llvm::Value *Addr; 4677 llvm::Value *Size; 4678 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4679 LValue Base; 4680 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4681 Base = CGF.MakeAddrLValue( 4682 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4683 } else { 4684 LValue &PosLVal = *Pos.get<LValue *>(); 4685 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4686 Base = CGF.MakeAddrLValue( 4687 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4688 DependenciesArray.getPointer(), Idx), 4689 DependenciesArray.getAlignment()), 4690 KmpDependInfoTy); 4691 } 4692 // deps[i].base_addr = &<Dependencies[i].second>; 4693 LValue BaseAddrLVal = CGF.EmitLValueForField( 4694 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4695 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4696 BaseAddrLVal); 4697 // deps[i].len = sizeof(<Dependencies[i].second>); 4698 LValue LenLVal = CGF.EmitLValueForField( 4699 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4700 CGF.EmitStoreOfScalar(Size, LenLVal); 4701 // deps[i].flags = <Dependencies[i].first>; 4702 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4703 LValue FlagsLVal = CGF.EmitLValueForField( 4704 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4705 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4706 FlagsLVal); 4707 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4708 ++(*P); 4709 } else { 4710 LValue &PosLVal = *Pos.get<LValue *>(); 4711 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4712 Idx = CGF.Builder.CreateNUWAdd(Idx, 4713 llvm::ConstantInt::get(Idx->getType(), 1)); 4714 CGF.EmitStoreOfScalar(Idx, PosLVal); 4715 } 4716 } 4717 } 4718 4719 static SmallVector<llvm::Value *, 4> 4720 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4721 const OMPTaskDataTy::DependData &Data) { 4722 assert(Data.DepKind == OMPC_DEPEND_depobj && 4723 "Expected depobj dependecy kind."); 4724 SmallVector<llvm::Value *, 4> Sizes; 4725 SmallVector<LValue, 4> SizeLVals; 4726 ASTContext &C = CGF.getContext(); 4727 QualType FlagsTy; 4728 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4729 RecordDecl *KmpDependInfoRD = 4730 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4731 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4732 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4733 { 4734 OMPIteratorGeneratorScope IteratorScope( 4735 CGF, cast_or_null<OMPIteratorExpr>( 4736 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4737 : nullptr)); 4738 for (const Expr *E : Data.DepExprs) { 4739 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4740 LValue Base = CGF.EmitLoadOfPointerLValue( 4741 DepobjLVal.getAddress(CGF), 4742 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4743 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4744 Base.getAddress(CGF), KmpDependInfoPtrT); 4745 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4746 Base.getTBAAInfo()); 4747 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4748 Addr.getElementType(), Addr.getPointer(), 4749 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4750 LValue NumDepsBase = CGF.MakeAddrLValue( 4751 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4752 Base.getBaseInfo(), Base.getTBAAInfo()); 4753 // NumDeps = deps[i].base_addr; 4754 LValue BaseAddrLVal = CGF.EmitLValueForField( 4755 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4756 llvm::Value *NumDeps = 4757 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4758 LValue NumLVal = CGF.MakeAddrLValue( 4759 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4760 C.getUIntPtrType()); 4761 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4762 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4763 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4764 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4765 CGF.EmitStoreOfScalar(Add, NumLVal); 4766 SizeLVals.push_back(NumLVal); 4767 } 4768 } 4769 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4770 llvm::Value *Size = 4771 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4772 Sizes.push_back(Size); 4773 } 4774 return Sizes; 4775 } 4776 4777 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4778 LValue PosLVal, 4779 const OMPTaskDataTy::DependData &Data, 4780 Address DependenciesArray) { 4781 assert(Data.DepKind == OMPC_DEPEND_depobj && 4782 "Expected depobj dependecy kind."); 4783 ASTContext &C = CGF.getContext(); 4784 QualType FlagsTy; 4785 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4786 RecordDecl *KmpDependInfoRD = 4787 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4788 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4789 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4790 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4791 { 4792 OMPIteratorGeneratorScope IteratorScope( 4793 CGF, cast_or_null<OMPIteratorExpr>( 4794 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4795 : nullptr)); 4796 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4797 const Expr *E = Data.DepExprs[I]; 4798 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4799 LValue Base = CGF.EmitLoadOfPointerLValue( 4800 DepobjLVal.getAddress(CGF), 4801 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4802 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4803 Base.getAddress(CGF), KmpDependInfoPtrT); 4804 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4805 Base.getTBAAInfo()); 4806 4807 // Get number of elements in a single depobj. 4808 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4809 Addr.getElementType(), Addr.getPointer(), 4810 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4811 LValue NumDepsBase = CGF.MakeAddrLValue( 4812 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4813 Base.getBaseInfo(), Base.getTBAAInfo()); 4814 // NumDeps = deps[i].base_addr; 4815 LValue BaseAddrLVal = CGF.EmitLValueForField( 4816 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4817 llvm::Value *NumDeps = 4818 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4819 4820 // memcopy dependency data. 4821 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4822 ElSize, 4823 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4824 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4825 Address DepAddr = 4826 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), 4827 DependenciesArray.getPointer(), Pos), 4828 DependenciesArray.getAlignment()); 4829 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4830 4831 // Increase pos. 4832 // pos += size; 4833 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4834 CGF.EmitStoreOfScalar(Add, PosLVal); 4835 } 4836 } 4837 } 4838 4839 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4840 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4841 SourceLocation Loc) { 4842 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4843 return D.DepExprs.empty(); 4844 })) 4845 return std::make_pair(nullptr, Address::invalid()); 4846 // Process list of dependencies. 4847 ASTContext &C = CGM.getContext(); 4848 Address DependenciesArray = Address::invalid(); 4849 llvm::Value *NumOfElements = nullptr; 4850 unsigned NumDependencies = std::accumulate( 4851 Dependencies.begin(), Dependencies.end(), 0, 4852 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4853 return D.DepKind == OMPC_DEPEND_depobj 4854 ? V 4855 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4856 }); 4857 QualType FlagsTy; 4858 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4859 bool HasDepobjDeps = false; 4860 bool HasRegularWithIterators = false; 4861 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4862 llvm::Value *NumOfRegularWithIterators = 4863 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4864 // Calculate number of depobj dependecies and regular deps with the iterators. 4865 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4866 if (D.DepKind == OMPC_DEPEND_depobj) { 4867 SmallVector<llvm::Value *, 4> Sizes = 4868 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4869 for (llvm::Value *Size : Sizes) { 4870 NumOfDepobjElements = 4871 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4872 } 4873 HasDepobjDeps = true; 4874 continue; 4875 } 4876 // Include number of iterations, if any. 4877 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4878 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4879 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4880 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4881 NumOfRegularWithIterators = 4882 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4883 } 4884 HasRegularWithIterators = true; 4885 continue; 4886 } 4887 } 4888 4889 QualType KmpDependInfoArrayTy; 4890 if (HasDepobjDeps || HasRegularWithIterators) { 4891 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4892 /*isSigned=*/false); 4893 if (HasDepobjDeps) { 4894 NumOfElements = 4895 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4896 } 4897 if (HasRegularWithIterators) { 4898 NumOfElements = 4899 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4900 } 4901 OpaqueValueExpr OVE(Loc, 4902 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4903 VK_PRValue); 4904 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4905 RValue::get(NumOfElements)); 4906 KmpDependInfoArrayTy = 4907 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4908 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4909 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4910 // Properly emit variable-sized array. 4911 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4912 ImplicitParamDecl::Other); 4913 CGF.EmitVarDecl(*PD); 4914 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4915 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4916 /*isSigned=*/false); 4917 } else { 4918 KmpDependInfoArrayTy = C.getConstantArrayType( 4919 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4920 ArrayType::Normal, /*IndexTypeQuals=*/0); 4921 DependenciesArray = 4922 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4923 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4924 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4925 /*isSigned=*/false); 4926 } 4927 unsigned Pos = 0; 4928 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4929 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4930 Dependencies[I].IteratorExpr) 4931 continue; 4932 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4933 DependenciesArray); 4934 } 4935 // Copy regular dependecies with iterators. 4936 LValue PosLVal = CGF.MakeAddrLValue( 4937 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4938 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4939 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4940 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4941 !Dependencies[I].IteratorExpr) 4942 continue; 4943 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4944 DependenciesArray); 4945 } 4946 // Copy final depobj arrays without iterators. 4947 if (HasDepobjDeps) { 4948 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4949 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4950 continue; 4951 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4952 DependenciesArray); 4953 } 4954 } 4955 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4956 DependenciesArray, CGF.VoidPtrTy); 4957 return std::make_pair(NumOfElements, DependenciesArray); 4958 } 4959 4960 Address CGOpenMPRuntime::emitDepobjDependClause( 4961 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4962 SourceLocation Loc) { 4963 if (Dependencies.DepExprs.empty()) 4964 return Address::invalid(); 4965 // Process list of dependencies. 4966 ASTContext &C = CGM.getContext(); 4967 Address DependenciesArray = Address::invalid(); 4968 unsigned NumDependencies = Dependencies.DepExprs.size(); 4969 QualType FlagsTy; 4970 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4971 RecordDecl *KmpDependInfoRD = 4972 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4973 4974 llvm::Value *Size; 4975 // Define type kmp_depend_info[<Dependencies.size()>]; 4976 // For depobj reserve one extra element to store the number of elements. 4977 // It is required to handle depobj(x) update(in) construct. 4978 // kmp_depend_info[<Dependencies.size()>] deps; 4979 llvm::Value *NumDepsVal; 4980 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4981 if (const auto *IE = 4982 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4983 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4984 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4985 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4986 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4987 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4988 } 4989 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4990 NumDepsVal); 4991 CharUnits SizeInBytes = 4992 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4993 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4994 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4995 NumDepsVal = 4996 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4997 } else { 4998 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4999 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5000 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5001 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5002 Size = CGM.getSize(Sz.alignTo(Align)); 5003 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5004 } 5005 // Need to allocate on the dynamic memory. 5006 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5007 // Use default allocator. 5008 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5009 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5010 5011 llvm::Value *Addr = 5012 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5013 CGM.getModule(), OMPRTL___kmpc_alloc), 5014 Args, ".dep.arr.addr"); 5015 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5016 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5017 DependenciesArray = Address(Addr, Align); 5018 // Write number of elements in the first element of array for depobj. 5019 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5020 // deps[i].base_addr = NumDependencies; 5021 LValue BaseAddrLVal = CGF.EmitLValueForField( 5022 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5023 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5024 llvm::PointerUnion<unsigned *, LValue *> Pos; 5025 unsigned Idx = 1; 5026 LValue PosLVal; 5027 if (Dependencies.IteratorExpr) { 5028 PosLVal = CGF.MakeAddrLValue( 5029 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5030 C.getSizeType()); 5031 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5032 /*IsInit=*/true); 5033 Pos = &PosLVal; 5034 } else { 5035 Pos = &Idx; 5036 } 5037 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5038 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5039 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5040 return DependenciesArray; 5041 } 5042 5043 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5044 SourceLocation Loc) { 5045 ASTContext &C = CGM.getContext(); 5046 QualType FlagsTy; 5047 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5048 LValue Base = CGF.EmitLoadOfPointerLValue( 5049 DepobjLVal.getAddress(CGF), 5050 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5051 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5052 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5053 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5054 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5055 Addr.getElementType(), Addr.getPointer(), 5056 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5057 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5058 CGF.VoidPtrTy); 5059 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5060 // Use default allocator. 5061 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5062 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5063 5064 // _kmpc_free(gtid, addr, nullptr); 5065 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5066 CGM.getModule(), OMPRTL___kmpc_free), 5067 Args); 5068 } 5069 5070 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5071 OpenMPDependClauseKind NewDepKind, 5072 SourceLocation Loc) { 5073 ASTContext &C = CGM.getContext(); 5074 QualType FlagsTy; 5075 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5076 RecordDecl *KmpDependInfoRD = 5077 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5078 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5079 llvm::Value *NumDeps; 5080 LValue Base; 5081 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5082 5083 Address Begin = Base.getAddress(CGF); 5084 // Cast from pointer to array type to pointer to single element. 5085 llvm::Value *End = CGF.Builder.CreateGEP( 5086 Begin.getElementType(), Begin.getPointer(), NumDeps); 5087 // The basic structure here is a while-do loop. 5088 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5089 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5090 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5091 CGF.EmitBlock(BodyBB); 5092 llvm::PHINode *ElementPHI = 5093 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5094 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5095 Begin = Address(ElementPHI, Begin.getAlignment()); 5096 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5097 Base.getTBAAInfo()); 5098 // deps[i].flags = NewDepKind; 5099 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5100 LValue FlagsLVal = CGF.EmitLValueForField( 5101 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5102 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5103 FlagsLVal); 5104 5105 // Shift the address forward by one element. 5106 Address ElementNext = 5107 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5108 ElementPHI->addIncoming(ElementNext.getPointer(), 5109 CGF.Builder.GetInsertBlock()); 5110 llvm::Value *IsEmpty = 5111 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5112 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5113 // Done. 5114 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5115 } 5116 5117 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5118 const OMPExecutableDirective &D, 5119 llvm::Function *TaskFunction, 5120 QualType SharedsTy, Address Shareds, 5121 const Expr *IfCond, 5122 const OMPTaskDataTy &Data) { 5123 if (!CGF.HaveInsertPoint()) 5124 return; 5125 5126 TaskResultTy Result = 5127 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5128 llvm::Value *NewTask = Result.NewTask; 5129 llvm::Function *TaskEntry = Result.TaskEntry; 5130 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5131 LValue TDBase = Result.TDBase; 5132 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5133 // Process list of dependences. 5134 Address DependenciesArray = Address::invalid(); 5135 llvm::Value *NumOfElements; 5136 std::tie(NumOfElements, DependenciesArray) = 5137 emitDependClause(CGF, Data.Dependences, Loc); 5138 5139 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5140 // libcall. 5141 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5142 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5143 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5144 // list is not empty 5145 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5146 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5147 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5148 llvm::Value *DepTaskArgs[7]; 5149 if (!Data.Dependences.empty()) { 5150 DepTaskArgs[0] = UpLoc; 5151 DepTaskArgs[1] = ThreadID; 5152 DepTaskArgs[2] = NewTask; 5153 DepTaskArgs[3] = NumOfElements; 5154 DepTaskArgs[4] = DependenciesArray.getPointer(); 5155 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5156 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5157 } 5158 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5159 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5160 if (!Data.Tied) { 5161 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5162 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5163 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5164 } 5165 if (!Data.Dependences.empty()) { 5166 CGF.EmitRuntimeCall( 5167 OMPBuilder.getOrCreateRuntimeFunction( 5168 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5169 DepTaskArgs); 5170 } else { 5171 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5172 CGM.getModule(), OMPRTL___kmpc_omp_task), 5173 TaskArgs); 5174 } 5175 // Check if parent region is untied and build return for untied task; 5176 if (auto *Region = 5177 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5178 Region->emitUntiedSwitch(CGF); 5179 }; 5180 5181 llvm::Value *DepWaitTaskArgs[6]; 5182 if (!Data.Dependences.empty()) { 5183 DepWaitTaskArgs[0] = UpLoc; 5184 DepWaitTaskArgs[1] = ThreadID; 5185 DepWaitTaskArgs[2] = NumOfElements; 5186 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5187 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5188 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5189 } 5190 auto &M = CGM.getModule(); 5191 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5192 TaskEntry, &Data, &DepWaitTaskArgs, 5193 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5194 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5195 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5196 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5197 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5198 // is specified. 5199 if (!Data.Dependences.empty()) 5200 CGF.EmitRuntimeCall( 5201 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5202 DepWaitTaskArgs); 5203 // Call proxy_task_entry(gtid, new_task); 5204 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5205 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5206 Action.Enter(CGF); 5207 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5208 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5209 OutlinedFnArgs); 5210 }; 5211 5212 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5213 // kmp_task_t *new_task); 5214 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5215 // kmp_task_t *new_task); 5216 RegionCodeGenTy RCG(CodeGen); 5217 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5218 M, OMPRTL___kmpc_omp_task_begin_if0), 5219 TaskArgs, 5220 OMPBuilder.getOrCreateRuntimeFunction( 5221 M, OMPRTL___kmpc_omp_task_complete_if0), 5222 TaskArgs); 5223 RCG.setAction(Action); 5224 RCG(CGF); 5225 }; 5226 5227 if (IfCond) { 5228 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5229 } else { 5230 RegionCodeGenTy ThenRCG(ThenCodeGen); 5231 ThenRCG(CGF); 5232 } 5233 } 5234 5235 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5236 const OMPLoopDirective &D, 5237 llvm::Function *TaskFunction, 5238 QualType SharedsTy, Address Shareds, 5239 const Expr *IfCond, 5240 const OMPTaskDataTy &Data) { 5241 if (!CGF.HaveInsertPoint()) 5242 return; 5243 TaskResultTy Result = 5244 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5245 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5246 // libcall. 5247 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5248 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5249 // sched, kmp_uint64 grainsize, void *task_dup); 5250 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5251 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5252 llvm::Value *IfVal; 5253 if (IfCond) { 5254 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5255 /*isSigned=*/true); 5256 } else { 5257 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5258 } 5259 5260 LValue LBLVal = CGF.EmitLValueForField( 5261 Result.TDBase, 5262 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5263 const auto *LBVar = 5264 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5265 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5266 LBLVal.getQuals(), 5267 /*IsInitializer=*/true); 5268 LValue UBLVal = CGF.EmitLValueForField( 5269 Result.TDBase, 5270 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5271 const auto *UBVar = 5272 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5273 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5274 UBLVal.getQuals(), 5275 /*IsInitializer=*/true); 5276 LValue StLVal = CGF.EmitLValueForField( 5277 Result.TDBase, 5278 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5279 const auto *StVar = 5280 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5281 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5282 StLVal.getQuals(), 5283 /*IsInitializer=*/true); 5284 // Store reductions address. 5285 LValue RedLVal = CGF.EmitLValueForField( 5286 Result.TDBase, 5287 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5288 if (Data.Reductions) { 5289 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5290 } else { 5291 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5292 CGF.getContext().VoidPtrTy); 5293 } 5294 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5295 llvm::Value *TaskArgs[] = { 5296 UpLoc, 5297 ThreadID, 5298 Result.NewTask, 5299 IfVal, 5300 LBLVal.getPointer(CGF), 5301 UBLVal.getPointer(CGF), 5302 CGF.EmitLoadOfScalar(StLVal, Loc), 5303 llvm::ConstantInt::getSigned( 5304 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5305 llvm::ConstantInt::getSigned( 5306 CGF.IntTy, Data.Schedule.getPointer() 5307 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5308 : NoSchedule), 5309 Data.Schedule.getPointer() 5310 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5311 /*isSigned=*/false) 5312 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5313 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5314 Result.TaskDupFn, CGF.VoidPtrTy) 5315 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5317 CGM.getModule(), OMPRTL___kmpc_taskloop), 5318 TaskArgs); 5319 } 5320 5321 /// Emit reduction operation for each element of array (required for 5322 /// array sections) LHS op = RHS. 5323 /// \param Type Type of array. 5324 /// \param LHSVar Variable on the left side of the reduction operation 5325 /// (references element of array in original variable). 5326 /// \param RHSVar Variable on the right side of the reduction operation 5327 /// (references element of array in original variable). 5328 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5329 /// RHSVar. 5330 static void EmitOMPAggregateReduction( 5331 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5332 const VarDecl *RHSVar, 5333 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5334 const Expr *, const Expr *)> &RedOpGen, 5335 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5336 const Expr *UpExpr = nullptr) { 5337 // Perform element-by-element initialization. 5338 QualType ElementTy; 5339 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5340 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5341 5342 // Drill down to the base element type on both arrays. 5343 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5344 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5345 5346 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5347 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5348 // Cast from pointer to array type to pointer to single element. 5349 llvm::Value *LHSEnd = 5350 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5351 // The basic structure here is a while-do loop. 5352 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5353 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5354 llvm::Value *IsEmpty = 5355 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5356 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5357 5358 // Enter the loop body, making that address the current address. 5359 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5360 CGF.EmitBlock(BodyBB); 5361 5362 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5363 5364 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5365 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5366 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5367 Address RHSElementCurrent = 5368 Address(RHSElementPHI, 5369 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5370 5371 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5372 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5373 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5374 Address LHSElementCurrent = 5375 Address(LHSElementPHI, 5376 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5377 5378 // Emit copy. 5379 CodeGenFunction::OMPPrivateScope Scope(CGF); 5380 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5381 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5382 Scope.Privatize(); 5383 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5384 Scope.ForceCleanup(); 5385 5386 // Shift the address forward by one element. 5387 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5388 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5389 "omp.arraycpy.dest.element"); 5390 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5391 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5392 "omp.arraycpy.src.element"); 5393 // Check whether we've reached the end. 5394 llvm::Value *Done = 5395 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5396 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5397 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5398 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5399 5400 // Done. 5401 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5402 } 5403 5404 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5405 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5406 /// UDR combiner function. 5407 static void emitReductionCombiner(CodeGenFunction &CGF, 5408 const Expr *ReductionOp) { 5409 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5410 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5411 if (const auto *DRE = 5412 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5413 if (const auto *DRD = 5414 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5415 std::pair<llvm::Function *, llvm::Function *> Reduction = 5416 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5417 RValue Func = RValue::get(Reduction.first); 5418 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5419 CGF.EmitIgnoredExpr(ReductionOp); 5420 return; 5421 } 5422 CGF.EmitIgnoredExpr(ReductionOp); 5423 } 5424 5425 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5426 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5427 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5428 ArrayRef<const Expr *> ReductionOps) { 5429 ASTContext &C = CGM.getContext(); 5430 5431 // void reduction_func(void *LHSArg, void *RHSArg); 5432 FunctionArgList Args; 5433 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5434 ImplicitParamDecl::Other); 5435 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5436 ImplicitParamDecl::Other); 5437 Args.push_back(&LHSArg); 5438 Args.push_back(&RHSArg); 5439 const auto &CGFI = 5440 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5441 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5442 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5443 llvm::GlobalValue::InternalLinkage, Name, 5444 &CGM.getModule()); 5445 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5446 Fn->setDoesNotRecurse(); 5447 CodeGenFunction CGF(CGM); 5448 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5449 5450 // Dst = (void*[n])(LHSArg); 5451 // Src = (void*[n])(RHSArg); 5452 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5453 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5454 ArgsType), CGF.getPointerAlign()); 5455 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5456 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5457 ArgsType), CGF.getPointerAlign()); 5458 5459 // ... 5460 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5461 // ... 5462 CodeGenFunction::OMPPrivateScope Scope(CGF); 5463 auto IPriv = Privates.begin(); 5464 unsigned Idx = 0; 5465 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5466 const auto *RHSVar = 5467 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5468 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5469 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5470 }); 5471 const auto *LHSVar = 5472 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5473 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5474 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5475 }); 5476 QualType PrivTy = (*IPriv)->getType(); 5477 if (PrivTy->isVariablyModifiedType()) { 5478 // Get array size and emit VLA type. 5479 ++Idx; 5480 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5481 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5482 const VariableArrayType *VLA = 5483 CGF.getContext().getAsVariableArrayType(PrivTy); 5484 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5485 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5486 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5487 CGF.EmitVariablyModifiedType(PrivTy); 5488 } 5489 } 5490 Scope.Privatize(); 5491 IPriv = Privates.begin(); 5492 auto ILHS = LHSExprs.begin(); 5493 auto IRHS = RHSExprs.begin(); 5494 for (const Expr *E : ReductionOps) { 5495 if ((*IPriv)->getType()->isArrayType()) { 5496 // Emit reduction for array section. 5497 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5498 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5499 EmitOMPAggregateReduction( 5500 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5501 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5502 emitReductionCombiner(CGF, E); 5503 }); 5504 } else { 5505 // Emit reduction for array subscript or single variable. 5506 emitReductionCombiner(CGF, E); 5507 } 5508 ++IPriv; 5509 ++ILHS; 5510 ++IRHS; 5511 } 5512 Scope.ForceCleanup(); 5513 CGF.FinishFunction(); 5514 return Fn; 5515 } 5516 5517 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5518 const Expr *ReductionOp, 5519 const Expr *PrivateRef, 5520 const DeclRefExpr *LHS, 5521 const DeclRefExpr *RHS) { 5522 if (PrivateRef->getType()->isArrayType()) { 5523 // Emit reduction for array section. 5524 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5525 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5526 EmitOMPAggregateReduction( 5527 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5528 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5529 emitReductionCombiner(CGF, ReductionOp); 5530 }); 5531 } else { 5532 // Emit reduction for array subscript or single variable. 5533 emitReductionCombiner(CGF, ReductionOp); 5534 } 5535 } 5536 5537 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5538 ArrayRef<const Expr *> Privates, 5539 ArrayRef<const Expr *> LHSExprs, 5540 ArrayRef<const Expr *> RHSExprs, 5541 ArrayRef<const Expr *> ReductionOps, 5542 ReductionOptionsTy Options) { 5543 if (!CGF.HaveInsertPoint()) 5544 return; 5545 5546 bool WithNowait = Options.WithNowait; 5547 bool SimpleReduction = Options.SimpleReduction; 5548 5549 // Next code should be emitted for reduction: 5550 // 5551 // static kmp_critical_name lock = { 0 }; 5552 // 5553 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5554 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5555 // ... 5556 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5557 // *(Type<n>-1*)rhs[<n>-1]); 5558 // } 5559 // 5560 // ... 5561 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5562 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5563 // RedList, reduce_func, &<lock>)) { 5564 // case 1: 5565 // ... 5566 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5567 // ... 5568 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5569 // break; 5570 // case 2: 5571 // ... 5572 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5573 // ... 5574 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5575 // break; 5576 // default:; 5577 // } 5578 // 5579 // if SimpleReduction is true, only the next code is generated: 5580 // ... 5581 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5582 // ... 5583 5584 ASTContext &C = CGM.getContext(); 5585 5586 if (SimpleReduction) { 5587 CodeGenFunction::RunCleanupsScope Scope(CGF); 5588 auto IPriv = Privates.begin(); 5589 auto ILHS = LHSExprs.begin(); 5590 auto IRHS = RHSExprs.begin(); 5591 for (const Expr *E : ReductionOps) { 5592 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5593 cast<DeclRefExpr>(*IRHS)); 5594 ++IPriv; 5595 ++ILHS; 5596 ++IRHS; 5597 } 5598 return; 5599 } 5600 5601 // 1. Build a list of reduction variables. 5602 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5603 auto Size = RHSExprs.size(); 5604 for (const Expr *E : Privates) { 5605 if (E->getType()->isVariablyModifiedType()) 5606 // Reserve place for array size. 5607 ++Size; 5608 } 5609 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5610 QualType ReductionArrayTy = 5611 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5612 /*IndexTypeQuals=*/0); 5613 Address ReductionList = 5614 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5615 auto IPriv = Privates.begin(); 5616 unsigned Idx = 0; 5617 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5618 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5619 CGF.Builder.CreateStore( 5620 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5621 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5622 Elem); 5623 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5624 // Store array size. 5625 ++Idx; 5626 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5627 llvm::Value *Size = CGF.Builder.CreateIntCast( 5628 CGF.getVLASize( 5629 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5630 .NumElts, 5631 CGF.SizeTy, /*isSigned=*/false); 5632 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5633 Elem); 5634 } 5635 } 5636 5637 // 2. Emit reduce_func(). 5638 llvm::Function *ReductionFn = emitReductionFunction( 5639 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5640 LHSExprs, RHSExprs, ReductionOps); 5641 5642 // 3. Create static kmp_critical_name lock = { 0 }; 5643 std::string Name = getName({"reduction"}); 5644 llvm::Value *Lock = getCriticalRegionLock(Name); 5645 5646 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5647 // RedList, reduce_func, &<lock>); 5648 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5649 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5650 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5651 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5652 ReductionList.getPointer(), CGF.VoidPtrTy); 5653 llvm::Value *Args[] = { 5654 IdentTLoc, // ident_t *<loc> 5655 ThreadId, // i32 <gtid> 5656 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5657 ReductionArrayTySize, // size_type sizeof(RedList) 5658 RL, // void *RedList 5659 ReductionFn, // void (*) (void *, void *) <reduce_func> 5660 Lock // kmp_critical_name *&<lock> 5661 }; 5662 llvm::Value *Res = CGF.EmitRuntimeCall( 5663 OMPBuilder.getOrCreateRuntimeFunction( 5664 CGM.getModule(), 5665 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5666 Args); 5667 5668 // 5. Build switch(res) 5669 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5670 llvm::SwitchInst *SwInst = 5671 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5672 5673 // 6. Build case 1: 5674 // ... 5675 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5676 // ... 5677 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5678 // break; 5679 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5680 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5681 CGF.EmitBlock(Case1BB); 5682 5683 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5684 llvm::Value *EndArgs[] = { 5685 IdentTLoc, // ident_t *<loc> 5686 ThreadId, // i32 <gtid> 5687 Lock // kmp_critical_name *&<lock> 5688 }; 5689 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5690 CodeGenFunction &CGF, PrePostActionTy &Action) { 5691 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5692 auto IPriv = Privates.begin(); 5693 auto ILHS = LHSExprs.begin(); 5694 auto IRHS = RHSExprs.begin(); 5695 for (const Expr *E : ReductionOps) { 5696 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5697 cast<DeclRefExpr>(*IRHS)); 5698 ++IPriv; 5699 ++ILHS; 5700 ++IRHS; 5701 } 5702 }; 5703 RegionCodeGenTy RCG(CodeGen); 5704 CommonActionTy Action( 5705 nullptr, llvm::None, 5706 OMPBuilder.getOrCreateRuntimeFunction( 5707 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5708 : OMPRTL___kmpc_end_reduce), 5709 EndArgs); 5710 RCG.setAction(Action); 5711 RCG(CGF); 5712 5713 CGF.EmitBranch(DefaultBB); 5714 5715 // 7. Build case 2: 5716 // ... 5717 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5718 // ... 5719 // break; 5720 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5721 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5722 CGF.EmitBlock(Case2BB); 5723 5724 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5725 CodeGenFunction &CGF, PrePostActionTy &Action) { 5726 auto ILHS = LHSExprs.begin(); 5727 auto IRHS = RHSExprs.begin(); 5728 auto IPriv = Privates.begin(); 5729 for (const Expr *E : ReductionOps) { 5730 const Expr *XExpr = nullptr; 5731 const Expr *EExpr = nullptr; 5732 const Expr *UpExpr = nullptr; 5733 BinaryOperatorKind BO = BO_Comma; 5734 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5735 if (BO->getOpcode() == BO_Assign) { 5736 XExpr = BO->getLHS(); 5737 UpExpr = BO->getRHS(); 5738 } 5739 } 5740 // Try to emit update expression as a simple atomic. 5741 const Expr *RHSExpr = UpExpr; 5742 if (RHSExpr) { 5743 // Analyze RHS part of the whole expression. 5744 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5745 RHSExpr->IgnoreParenImpCasts())) { 5746 // If this is a conditional operator, analyze its condition for 5747 // min/max reduction operator. 5748 RHSExpr = ACO->getCond(); 5749 } 5750 if (const auto *BORHS = 5751 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5752 EExpr = BORHS->getRHS(); 5753 BO = BORHS->getOpcode(); 5754 } 5755 } 5756 if (XExpr) { 5757 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5758 auto &&AtomicRedGen = [BO, VD, 5759 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5760 const Expr *EExpr, const Expr *UpExpr) { 5761 LValue X = CGF.EmitLValue(XExpr); 5762 RValue E; 5763 if (EExpr) 5764 E = CGF.EmitAnyExpr(EExpr); 5765 CGF.EmitOMPAtomicSimpleUpdateExpr( 5766 X, E, BO, /*IsXLHSInRHSPart=*/true, 5767 llvm::AtomicOrdering::Monotonic, Loc, 5768 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5769 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5770 PrivateScope.addPrivate( 5771 VD, [&CGF, VD, XRValue, Loc]() { 5772 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5773 CGF.emitOMPSimpleStore( 5774 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5775 VD->getType().getNonReferenceType(), Loc); 5776 return LHSTemp; 5777 }); 5778 (void)PrivateScope.Privatize(); 5779 return CGF.EmitAnyExpr(UpExpr); 5780 }); 5781 }; 5782 if ((*IPriv)->getType()->isArrayType()) { 5783 // Emit atomic reduction for array section. 5784 const auto *RHSVar = 5785 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5786 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5787 AtomicRedGen, XExpr, EExpr, UpExpr); 5788 } else { 5789 // Emit atomic reduction for array subscript or single variable. 5790 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5791 } 5792 } else { 5793 // Emit as a critical region. 5794 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5795 const Expr *, const Expr *) { 5796 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5797 std::string Name = RT.getName({"atomic_reduction"}); 5798 RT.emitCriticalRegion( 5799 CGF, Name, 5800 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5801 Action.Enter(CGF); 5802 emitReductionCombiner(CGF, E); 5803 }, 5804 Loc); 5805 }; 5806 if ((*IPriv)->getType()->isArrayType()) { 5807 const auto *LHSVar = 5808 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5809 const auto *RHSVar = 5810 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5811 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5812 CritRedGen); 5813 } else { 5814 CritRedGen(CGF, nullptr, nullptr, nullptr); 5815 } 5816 } 5817 ++ILHS; 5818 ++IRHS; 5819 ++IPriv; 5820 } 5821 }; 5822 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5823 if (!WithNowait) { 5824 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5825 llvm::Value *EndArgs[] = { 5826 IdentTLoc, // ident_t *<loc> 5827 ThreadId, // i32 <gtid> 5828 Lock // kmp_critical_name *&<lock> 5829 }; 5830 CommonActionTy Action(nullptr, llvm::None, 5831 OMPBuilder.getOrCreateRuntimeFunction( 5832 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5833 EndArgs); 5834 AtomicRCG.setAction(Action); 5835 AtomicRCG(CGF); 5836 } else { 5837 AtomicRCG(CGF); 5838 } 5839 5840 CGF.EmitBranch(DefaultBB); 5841 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5842 } 5843 5844 /// Generates unique name for artificial threadprivate variables. 5845 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5846 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5847 const Expr *Ref) { 5848 SmallString<256> Buffer; 5849 llvm::raw_svector_ostream Out(Buffer); 5850 const clang::DeclRefExpr *DE; 5851 const VarDecl *D = ::getBaseDecl(Ref, DE); 5852 if (!D) 5853 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5854 D = D->getCanonicalDecl(); 5855 std::string Name = CGM.getOpenMPRuntime().getName( 5856 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5857 Out << Prefix << Name << "_" 5858 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5859 return std::string(Out.str()); 5860 } 5861 5862 /// Emits reduction initializer function: 5863 /// \code 5864 /// void @.red_init(void* %arg, void* %orig) { 5865 /// %0 = bitcast void* %arg to <type>* 5866 /// store <type> <init>, <type>* %0 5867 /// ret void 5868 /// } 5869 /// \endcode 5870 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5871 SourceLocation Loc, 5872 ReductionCodeGen &RCG, unsigned N) { 5873 ASTContext &C = CGM.getContext(); 5874 QualType VoidPtrTy = C.VoidPtrTy; 5875 VoidPtrTy.addRestrict(); 5876 FunctionArgList Args; 5877 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5878 ImplicitParamDecl::Other); 5879 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5880 ImplicitParamDecl::Other); 5881 Args.emplace_back(&Param); 5882 Args.emplace_back(&ParamOrig); 5883 const auto &FnInfo = 5884 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5885 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5886 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5887 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5888 Name, &CGM.getModule()); 5889 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5890 Fn->setDoesNotRecurse(); 5891 CodeGenFunction CGF(CGM); 5892 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5893 Address PrivateAddr = CGF.EmitLoadOfPointer( 5894 CGF.GetAddrOfLocalVar(&Param), 5895 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5896 llvm::Value *Size = nullptr; 5897 // If the size of the reduction item is non-constant, load it from global 5898 // threadprivate variable. 5899 if (RCG.getSizes(N).second) { 5900 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5901 CGF, CGM.getContext().getSizeType(), 5902 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5903 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5904 CGM.getContext().getSizeType(), Loc); 5905 } 5906 RCG.emitAggregateType(CGF, N, Size); 5907 LValue OrigLVal; 5908 // If initializer uses initializer from declare reduction construct, emit a 5909 // pointer to the address of the original reduction item (reuired by reduction 5910 // initializer) 5911 if (RCG.usesReductionInitializer(N)) { 5912 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5913 SharedAddr = CGF.EmitLoadOfPointer( 5914 SharedAddr, 5915 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5916 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5917 } else { 5918 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5919 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5920 CGM.getContext().VoidPtrTy); 5921 } 5922 // Emit the initializer: 5923 // %0 = bitcast void* %arg to <type>* 5924 // store <type> <init>, <type>* %0 5925 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5926 [](CodeGenFunction &) { return false; }); 5927 CGF.FinishFunction(); 5928 return Fn; 5929 } 5930 5931 /// Emits reduction combiner function: 5932 /// \code 5933 /// void @.red_comb(void* %arg0, void* %arg1) { 5934 /// %lhs = bitcast void* %arg0 to <type>* 5935 /// %rhs = bitcast void* %arg1 to <type>* 5936 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5937 /// store <type> %2, <type>* %lhs 5938 /// ret void 5939 /// } 5940 /// \endcode 5941 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5942 SourceLocation Loc, 5943 ReductionCodeGen &RCG, unsigned N, 5944 const Expr *ReductionOp, 5945 const Expr *LHS, const Expr *RHS, 5946 const Expr *PrivateRef) { 5947 ASTContext &C = CGM.getContext(); 5948 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5949 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5950 FunctionArgList Args; 5951 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5952 C.VoidPtrTy, ImplicitParamDecl::Other); 5953 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5954 ImplicitParamDecl::Other); 5955 Args.emplace_back(&ParamInOut); 5956 Args.emplace_back(&ParamIn); 5957 const auto &FnInfo = 5958 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5959 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5960 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5961 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5962 Name, &CGM.getModule()); 5963 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5964 Fn->setDoesNotRecurse(); 5965 CodeGenFunction CGF(CGM); 5966 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5967 llvm::Value *Size = nullptr; 5968 // If the size of the reduction item is non-constant, load it from global 5969 // threadprivate variable. 5970 if (RCG.getSizes(N).second) { 5971 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5972 CGF, CGM.getContext().getSizeType(), 5973 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5974 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5975 CGM.getContext().getSizeType(), Loc); 5976 } 5977 RCG.emitAggregateType(CGF, N, Size); 5978 // Remap lhs and rhs variables to the addresses of the function arguments. 5979 // %lhs = bitcast void* %arg0 to <type>* 5980 // %rhs = bitcast void* %arg1 to <type>* 5981 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5982 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5983 // Pull out the pointer to the variable. 5984 Address PtrAddr = CGF.EmitLoadOfPointer( 5985 CGF.GetAddrOfLocalVar(&ParamInOut), 5986 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5987 return CGF.Builder.CreateElementBitCast( 5988 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5989 }); 5990 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5991 // Pull out the pointer to the variable. 5992 Address PtrAddr = CGF.EmitLoadOfPointer( 5993 CGF.GetAddrOfLocalVar(&ParamIn), 5994 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5995 return CGF.Builder.CreateElementBitCast( 5996 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5997 }); 5998 PrivateScope.Privatize(); 5999 // Emit the combiner body: 6000 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6001 // store <type> %2, <type>* %lhs 6002 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6003 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6004 cast<DeclRefExpr>(RHS)); 6005 CGF.FinishFunction(); 6006 return Fn; 6007 } 6008 6009 /// Emits reduction finalizer function: 6010 /// \code 6011 /// void @.red_fini(void* %arg) { 6012 /// %0 = bitcast void* %arg to <type>* 6013 /// <destroy>(<type>* %0) 6014 /// ret void 6015 /// } 6016 /// \endcode 6017 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6018 SourceLocation Loc, 6019 ReductionCodeGen &RCG, unsigned N) { 6020 if (!RCG.needCleanups(N)) 6021 return nullptr; 6022 ASTContext &C = CGM.getContext(); 6023 FunctionArgList Args; 6024 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6025 ImplicitParamDecl::Other); 6026 Args.emplace_back(&Param); 6027 const auto &FnInfo = 6028 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6029 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6030 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6031 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6032 Name, &CGM.getModule()); 6033 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6034 Fn->setDoesNotRecurse(); 6035 CodeGenFunction CGF(CGM); 6036 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6037 Address PrivateAddr = CGF.EmitLoadOfPointer( 6038 CGF.GetAddrOfLocalVar(&Param), 6039 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6040 llvm::Value *Size = nullptr; 6041 // If the size of the reduction item is non-constant, load it from global 6042 // threadprivate variable. 6043 if (RCG.getSizes(N).second) { 6044 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6045 CGF, CGM.getContext().getSizeType(), 6046 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6047 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6048 CGM.getContext().getSizeType(), Loc); 6049 } 6050 RCG.emitAggregateType(CGF, N, Size); 6051 // Emit the finalizer body: 6052 // <destroy>(<type>* %0) 6053 RCG.emitCleanups(CGF, N, PrivateAddr); 6054 CGF.FinishFunction(Loc); 6055 return Fn; 6056 } 6057 6058 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6059 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6060 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6061 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6062 return nullptr; 6063 6064 // Build typedef struct: 6065 // kmp_taskred_input { 6066 // void *reduce_shar; // shared reduction item 6067 // void *reduce_orig; // original reduction item used for initialization 6068 // size_t reduce_size; // size of data item 6069 // void *reduce_init; // data initialization routine 6070 // void *reduce_fini; // data finalization routine 6071 // void *reduce_comb; // data combiner routine 6072 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6073 // } kmp_taskred_input_t; 6074 ASTContext &C = CGM.getContext(); 6075 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6076 RD->startDefinition(); 6077 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6078 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6079 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6080 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6081 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6082 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6083 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6084 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6085 RD->completeDefinition(); 6086 QualType RDType = C.getRecordType(RD); 6087 unsigned Size = Data.ReductionVars.size(); 6088 llvm::APInt ArraySize(/*numBits=*/64, Size); 6089 QualType ArrayRDType = C.getConstantArrayType( 6090 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6091 // kmp_task_red_input_t .rd_input.[Size]; 6092 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6093 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6094 Data.ReductionCopies, Data.ReductionOps); 6095 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6096 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6097 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6098 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6099 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6100 TaskRedInput.getPointer(), Idxs, 6101 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6102 ".rd_input.gep."); 6103 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6104 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6105 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6106 RCG.emitSharedOrigLValue(CGF, Cnt); 6107 llvm::Value *CastedShared = 6108 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6109 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6110 // ElemLVal.reduce_orig = &Origs[Cnt]; 6111 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6112 llvm::Value *CastedOrig = 6113 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6114 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6115 RCG.emitAggregateType(CGF, Cnt); 6116 llvm::Value *SizeValInChars; 6117 llvm::Value *SizeVal; 6118 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6119 // We use delayed creation/initialization for VLAs and array sections. It is 6120 // required because runtime does not provide the way to pass the sizes of 6121 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6122 // threadprivate global variables are used to store these values and use 6123 // them in the functions. 6124 bool DelayedCreation = !!SizeVal; 6125 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6126 /*isSigned=*/false); 6127 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6128 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6129 // ElemLVal.reduce_init = init; 6130 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6131 llvm::Value *InitAddr = 6132 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6133 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6134 // ElemLVal.reduce_fini = fini; 6135 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6136 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6137 llvm::Value *FiniAddr = Fini 6138 ? CGF.EmitCastToVoidPtr(Fini) 6139 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6140 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6141 // ElemLVal.reduce_comb = comb; 6142 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6143 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6144 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6145 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6146 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6147 // ElemLVal.flags = 0; 6148 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6149 if (DelayedCreation) { 6150 CGF.EmitStoreOfScalar( 6151 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6152 FlagsLVal); 6153 } else 6154 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6155 FlagsLVal.getType()); 6156 } 6157 if (Data.IsReductionWithTaskMod) { 6158 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6159 // is_ws, int num, void *data); 6160 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6161 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6162 CGM.IntTy, /*isSigned=*/true); 6163 llvm::Value *Args[] = { 6164 IdentTLoc, GTid, 6165 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6166 /*isSigned=*/true), 6167 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6169 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6170 return CGF.EmitRuntimeCall( 6171 OMPBuilder.getOrCreateRuntimeFunction( 6172 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6173 Args); 6174 } 6175 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6176 llvm::Value *Args[] = { 6177 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6178 /*isSigned=*/true), 6179 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6180 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6181 CGM.VoidPtrTy)}; 6182 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6183 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6184 Args); 6185 } 6186 6187 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6188 SourceLocation Loc, 6189 bool IsWorksharingReduction) { 6190 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6191 // is_ws, int num, void *data); 6192 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6193 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6194 CGM.IntTy, /*isSigned=*/true); 6195 llvm::Value *Args[] = {IdentTLoc, GTid, 6196 llvm::ConstantInt::get(CGM.IntTy, 6197 IsWorksharingReduction ? 1 : 0, 6198 /*isSigned=*/true)}; 6199 (void)CGF.EmitRuntimeCall( 6200 OMPBuilder.getOrCreateRuntimeFunction( 6201 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6202 Args); 6203 } 6204 6205 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6206 SourceLocation Loc, 6207 ReductionCodeGen &RCG, 6208 unsigned N) { 6209 auto Sizes = RCG.getSizes(N); 6210 // Emit threadprivate global variable if the type is non-constant 6211 // (Sizes.second = nullptr). 6212 if (Sizes.second) { 6213 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6214 /*isSigned=*/false); 6215 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6216 CGF, CGM.getContext().getSizeType(), 6217 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6218 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6219 } 6220 } 6221 6222 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6223 SourceLocation Loc, 6224 llvm::Value *ReductionsPtr, 6225 LValue SharedLVal) { 6226 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6227 // *d); 6228 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6229 CGM.IntTy, 6230 /*isSigned=*/true), 6231 ReductionsPtr, 6232 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6233 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6234 return Address( 6235 CGF.EmitRuntimeCall( 6236 OMPBuilder.getOrCreateRuntimeFunction( 6237 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6238 Args), 6239 SharedLVal.getAlignment()); 6240 } 6241 6242 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6243 SourceLocation Loc) { 6244 if (!CGF.HaveInsertPoint()) 6245 return; 6246 6247 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6248 OMPBuilder.createTaskwait(CGF.Builder); 6249 } else { 6250 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6251 // global_tid); 6252 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6253 // Ignore return result until untied tasks are supported. 6254 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6255 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6256 Args); 6257 } 6258 6259 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6260 Region->emitUntiedSwitch(CGF); 6261 } 6262 6263 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6264 OpenMPDirectiveKind InnerKind, 6265 const RegionCodeGenTy &CodeGen, 6266 bool HasCancel) { 6267 if (!CGF.HaveInsertPoint()) 6268 return; 6269 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6270 InnerKind != OMPD_critical && 6271 InnerKind != OMPD_master && 6272 InnerKind != OMPD_masked); 6273 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6274 } 6275 6276 namespace { 6277 enum RTCancelKind { 6278 CancelNoreq = 0, 6279 CancelParallel = 1, 6280 CancelLoop = 2, 6281 CancelSections = 3, 6282 CancelTaskgroup = 4 6283 }; 6284 } // anonymous namespace 6285 6286 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6287 RTCancelKind CancelKind = CancelNoreq; 6288 if (CancelRegion == OMPD_parallel) 6289 CancelKind = CancelParallel; 6290 else if (CancelRegion == OMPD_for) 6291 CancelKind = CancelLoop; 6292 else if (CancelRegion == OMPD_sections) 6293 CancelKind = CancelSections; 6294 else { 6295 assert(CancelRegion == OMPD_taskgroup); 6296 CancelKind = CancelTaskgroup; 6297 } 6298 return CancelKind; 6299 } 6300 6301 void CGOpenMPRuntime::emitCancellationPointCall( 6302 CodeGenFunction &CGF, SourceLocation Loc, 6303 OpenMPDirectiveKind CancelRegion) { 6304 if (!CGF.HaveInsertPoint()) 6305 return; 6306 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6307 // global_tid, kmp_int32 cncl_kind); 6308 if (auto *OMPRegionInfo = 6309 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6310 // For 'cancellation point taskgroup', the task region info may not have a 6311 // cancel. This may instead happen in another adjacent task. 6312 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6313 llvm::Value *Args[] = { 6314 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6315 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6316 // Ignore return result until untied tasks are supported. 6317 llvm::Value *Result = CGF.EmitRuntimeCall( 6318 OMPBuilder.getOrCreateRuntimeFunction( 6319 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6320 Args); 6321 // if (__kmpc_cancellationpoint()) { 6322 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6323 // exit from construct; 6324 // } 6325 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6326 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6327 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6328 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6329 CGF.EmitBlock(ExitBB); 6330 if (CancelRegion == OMPD_parallel) 6331 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6332 // exit from construct; 6333 CodeGenFunction::JumpDest CancelDest = 6334 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6335 CGF.EmitBranchThroughCleanup(CancelDest); 6336 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6337 } 6338 } 6339 } 6340 6341 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6342 const Expr *IfCond, 6343 OpenMPDirectiveKind CancelRegion) { 6344 if (!CGF.HaveInsertPoint()) 6345 return; 6346 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6347 // kmp_int32 cncl_kind); 6348 auto &M = CGM.getModule(); 6349 if (auto *OMPRegionInfo = 6350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6351 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6352 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6353 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6354 llvm::Value *Args[] = { 6355 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6356 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6357 // Ignore return result until untied tasks are supported. 6358 llvm::Value *Result = CGF.EmitRuntimeCall( 6359 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6360 // if (__kmpc_cancel()) { 6361 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6362 // exit from construct; 6363 // } 6364 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6365 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6366 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6367 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6368 CGF.EmitBlock(ExitBB); 6369 if (CancelRegion == OMPD_parallel) 6370 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6371 // exit from construct; 6372 CodeGenFunction::JumpDest CancelDest = 6373 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6374 CGF.EmitBranchThroughCleanup(CancelDest); 6375 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6376 }; 6377 if (IfCond) { 6378 emitIfClause(CGF, IfCond, ThenGen, 6379 [](CodeGenFunction &, PrePostActionTy &) {}); 6380 } else { 6381 RegionCodeGenTy ThenRCG(ThenGen); 6382 ThenRCG(CGF); 6383 } 6384 } 6385 } 6386 6387 namespace { 6388 /// Cleanup action for uses_allocators support. 6389 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6390 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6391 6392 public: 6393 OMPUsesAllocatorsActionTy( 6394 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6395 : Allocators(Allocators) {} 6396 void Enter(CodeGenFunction &CGF) override { 6397 if (!CGF.HaveInsertPoint()) 6398 return; 6399 for (const auto &AllocatorData : Allocators) { 6400 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6401 CGF, AllocatorData.first, AllocatorData.second); 6402 } 6403 } 6404 void Exit(CodeGenFunction &CGF) override { 6405 if (!CGF.HaveInsertPoint()) 6406 return; 6407 for (const auto &AllocatorData : Allocators) { 6408 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6409 AllocatorData.first); 6410 } 6411 } 6412 }; 6413 } // namespace 6414 6415 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6416 const OMPExecutableDirective &D, StringRef ParentName, 6417 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6418 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6419 assert(!ParentName.empty() && "Invalid target region parent name!"); 6420 HasEmittedTargetRegion = true; 6421 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6422 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6423 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6424 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6425 if (!D.AllocatorTraits) 6426 continue; 6427 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6428 } 6429 } 6430 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6431 CodeGen.setAction(UsesAllocatorAction); 6432 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6433 IsOffloadEntry, CodeGen); 6434 } 6435 6436 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6437 const Expr *Allocator, 6438 const Expr *AllocatorTraits) { 6439 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6440 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6441 // Use default memspace handle. 6442 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6443 llvm::Value *NumTraits = llvm::ConstantInt::get( 6444 CGF.IntTy, cast<ConstantArrayType>( 6445 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6446 ->getSize() 6447 .getLimitedValue()); 6448 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6449 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6450 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6451 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6452 AllocatorTraitsLVal.getBaseInfo(), 6453 AllocatorTraitsLVal.getTBAAInfo()); 6454 llvm::Value *Traits = 6455 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6456 6457 llvm::Value *AllocatorVal = 6458 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6459 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6460 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6461 // Store to allocator. 6462 CGF.EmitVarDecl(*cast<VarDecl>( 6463 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6464 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6465 AllocatorVal = 6466 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6467 Allocator->getType(), Allocator->getExprLoc()); 6468 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6469 } 6470 6471 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6472 const Expr *Allocator) { 6473 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6474 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6475 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6476 llvm::Value *AllocatorVal = 6477 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6478 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6479 CGF.getContext().VoidPtrTy, 6480 Allocator->getExprLoc()); 6481 (void)CGF.EmitRuntimeCall( 6482 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6483 OMPRTL___kmpc_destroy_allocator), 6484 {ThreadId, AllocatorVal}); 6485 } 6486 6487 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6488 const OMPExecutableDirective &D, StringRef ParentName, 6489 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6490 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6491 // Create a unique name for the entry function using the source location 6492 // information of the current target region. The name will be something like: 6493 // 6494 // __omp_offloading_DD_FFFF_PP_lBB 6495 // 6496 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6497 // mangled name of the function that encloses the target region and BB is the 6498 // line number of the target region. 6499 6500 unsigned DeviceID; 6501 unsigned FileID; 6502 unsigned Line; 6503 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6504 Line); 6505 SmallString<64> EntryFnName; 6506 { 6507 llvm::raw_svector_ostream OS(EntryFnName); 6508 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6509 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6510 } 6511 6512 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6513 6514 CodeGenFunction CGF(CGM, true); 6515 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6516 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6517 6518 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6519 6520 // If this target outline function is not an offload entry, we don't need to 6521 // register it. 6522 if (!IsOffloadEntry) 6523 return; 6524 6525 // The target region ID is used by the runtime library to identify the current 6526 // target region, so it only has to be unique and not necessarily point to 6527 // anything. It could be the pointer to the outlined function that implements 6528 // the target region, but we aren't using that so that the compiler doesn't 6529 // need to keep that, and could therefore inline the host function if proven 6530 // worthwhile during optimization. In the other hand, if emitting code for the 6531 // device, the ID has to be the function address so that it can retrieved from 6532 // the offloading entry and launched by the runtime library. We also mark the 6533 // outlined function to have external linkage in case we are emitting code for 6534 // the device, because these functions will be entry points to the device. 6535 6536 if (CGM.getLangOpts().OpenMPIsDevice) { 6537 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6538 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6539 OutlinedFn->setDSOLocal(false); 6540 if (CGM.getTriple().isAMDGCN()) 6541 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6542 } else { 6543 std::string Name = getName({EntryFnName, "region_id"}); 6544 OutlinedFnID = new llvm::GlobalVariable( 6545 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6546 llvm::GlobalValue::WeakAnyLinkage, 6547 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6548 } 6549 6550 // Register the information for the entry associated with this target region. 6551 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6552 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6553 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6554 } 6555 6556 /// Checks if the expression is constant or does not have non-trivial function 6557 /// calls. 6558 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6559 // We can skip constant expressions. 6560 // We can skip expressions with trivial calls or simple expressions. 6561 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6562 !E->hasNonTrivialCall(Ctx)) && 6563 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6564 } 6565 6566 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6567 const Stmt *Body) { 6568 const Stmt *Child = Body->IgnoreContainers(); 6569 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6570 Child = nullptr; 6571 for (const Stmt *S : C->body()) { 6572 if (const auto *E = dyn_cast<Expr>(S)) { 6573 if (isTrivial(Ctx, E)) 6574 continue; 6575 } 6576 // Some of the statements can be ignored. 6577 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6578 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6579 continue; 6580 // Analyze declarations. 6581 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6582 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6583 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6584 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6585 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6586 isa<UsingDirectiveDecl>(D) || 6587 isa<OMPDeclareReductionDecl>(D) || 6588 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6589 return true; 6590 const auto *VD = dyn_cast<VarDecl>(D); 6591 if (!VD) 6592 return false; 6593 return VD->hasGlobalStorage() || !VD->isUsed(); 6594 })) 6595 continue; 6596 } 6597 // Found multiple children - cannot get the one child only. 6598 if (Child) 6599 return nullptr; 6600 Child = S; 6601 } 6602 if (Child) 6603 Child = Child->IgnoreContainers(); 6604 } 6605 return Child; 6606 } 6607 6608 /// Emit the number of teams for a target directive. Inspect the num_teams 6609 /// clause associated with a teams construct combined or closely nested 6610 /// with the target directive. 6611 /// 6612 /// Emit a team of size one for directives such as 'target parallel' that 6613 /// have no associated teams construct. 6614 /// 6615 /// Otherwise, return nullptr. 6616 static llvm::Value * 6617 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6618 const OMPExecutableDirective &D) { 6619 assert(!CGF.getLangOpts().OpenMPIsDevice && 6620 "Clauses associated with the teams directive expected to be emitted " 6621 "only for the host!"); 6622 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6623 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6624 "Expected target-based executable directive."); 6625 CGBuilderTy &Bld = CGF.Builder; 6626 switch (DirectiveKind) { 6627 case OMPD_target: { 6628 const auto *CS = D.getInnermostCapturedStmt(); 6629 const auto *Body = 6630 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6631 const Stmt *ChildStmt = 6632 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6633 if (const auto *NestedDir = 6634 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6635 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6636 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6637 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6638 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6639 const Expr *NumTeams = 6640 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6641 llvm::Value *NumTeamsVal = 6642 CGF.EmitScalarExpr(NumTeams, 6643 /*IgnoreResultAssign*/ true); 6644 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6645 /*isSigned=*/true); 6646 } 6647 return Bld.getInt32(0); 6648 } 6649 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6650 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6651 return Bld.getInt32(1); 6652 return Bld.getInt32(0); 6653 } 6654 return nullptr; 6655 } 6656 case OMPD_target_teams: 6657 case OMPD_target_teams_distribute: 6658 case OMPD_target_teams_distribute_simd: 6659 case OMPD_target_teams_distribute_parallel_for: 6660 case OMPD_target_teams_distribute_parallel_for_simd: { 6661 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6662 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6663 const Expr *NumTeams = 6664 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6665 llvm::Value *NumTeamsVal = 6666 CGF.EmitScalarExpr(NumTeams, 6667 /*IgnoreResultAssign*/ true); 6668 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6669 /*isSigned=*/true); 6670 } 6671 return Bld.getInt32(0); 6672 } 6673 case OMPD_target_parallel: 6674 case OMPD_target_parallel_for: 6675 case OMPD_target_parallel_for_simd: 6676 case OMPD_target_simd: 6677 return Bld.getInt32(1); 6678 case OMPD_parallel: 6679 case OMPD_for: 6680 case OMPD_parallel_for: 6681 case OMPD_parallel_master: 6682 case OMPD_parallel_sections: 6683 case OMPD_for_simd: 6684 case OMPD_parallel_for_simd: 6685 case OMPD_cancel: 6686 case OMPD_cancellation_point: 6687 case OMPD_ordered: 6688 case OMPD_threadprivate: 6689 case OMPD_allocate: 6690 case OMPD_task: 6691 case OMPD_simd: 6692 case OMPD_tile: 6693 case OMPD_unroll: 6694 case OMPD_sections: 6695 case OMPD_section: 6696 case OMPD_single: 6697 case OMPD_master: 6698 case OMPD_critical: 6699 case OMPD_taskyield: 6700 case OMPD_barrier: 6701 case OMPD_taskwait: 6702 case OMPD_taskgroup: 6703 case OMPD_atomic: 6704 case OMPD_flush: 6705 case OMPD_depobj: 6706 case OMPD_scan: 6707 case OMPD_teams: 6708 case OMPD_target_data: 6709 case OMPD_target_exit_data: 6710 case OMPD_target_enter_data: 6711 case OMPD_distribute: 6712 case OMPD_distribute_simd: 6713 case OMPD_distribute_parallel_for: 6714 case OMPD_distribute_parallel_for_simd: 6715 case OMPD_teams_distribute: 6716 case OMPD_teams_distribute_simd: 6717 case OMPD_teams_distribute_parallel_for: 6718 case OMPD_teams_distribute_parallel_for_simd: 6719 case OMPD_target_update: 6720 case OMPD_declare_simd: 6721 case OMPD_declare_variant: 6722 case OMPD_begin_declare_variant: 6723 case OMPD_end_declare_variant: 6724 case OMPD_declare_target: 6725 case OMPD_end_declare_target: 6726 case OMPD_declare_reduction: 6727 case OMPD_declare_mapper: 6728 case OMPD_taskloop: 6729 case OMPD_taskloop_simd: 6730 case OMPD_master_taskloop: 6731 case OMPD_master_taskloop_simd: 6732 case OMPD_parallel_master_taskloop: 6733 case OMPD_parallel_master_taskloop_simd: 6734 case OMPD_requires: 6735 case OMPD_unknown: 6736 break; 6737 default: 6738 break; 6739 } 6740 llvm_unreachable("Unexpected directive kind."); 6741 } 6742 6743 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6744 llvm::Value *DefaultThreadLimitVal) { 6745 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6746 CGF.getContext(), CS->getCapturedStmt()); 6747 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6748 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6749 llvm::Value *NumThreads = nullptr; 6750 llvm::Value *CondVal = nullptr; 6751 // Handle if clause. If if clause present, the number of threads is 6752 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6753 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6754 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6755 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6756 const OMPIfClause *IfClause = nullptr; 6757 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6758 if (C->getNameModifier() == OMPD_unknown || 6759 C->getNameModifier() == OMPD_parallel) { 6760 IfClause = C; 6761 break; 6762 } 6763 } 6764 if (IfClause) { 6765 const Expr *Cond = IfClause->getCondition(); 6766 bool Result; 6767 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6768 if (!Result) 6769 return CGF.Builder.getInt32(1); 6770 } else { 6771 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6772 if (const auto *PreInit = 6773 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6774 for (const auto *I : PreInit->decls()) { 6775 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6776 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6777 } else { 6778 CodeGenFunction::AutoVarEmission Emission = 6779 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6780 CGF.EmitAutoVarCleanups(Emission); 6781 } 6782 } 6783 } 6784 CondVal = CGF.EvaluateExprAsBool(Cond); 6785 } 6786 } 6787 } 6788 // Check the value of num_threads clause iff if clause was not specified 6789 // or is not evaluated to false. 6790 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6791 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6792 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6793 const auto *NumThreadsClause = 6794 Dir->getSingleClause<OMPNumThreadsClause>(); 6795 CodeGenFunction::LexicalScope Scope( 6796 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6797 if (const auto *PreInit = 6798 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6799 for (const auto *I : PreInit->decls()) { 6800 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6801 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6802 } else { 6803 CodeGenFunction::AutoVarEmission Emission = 6804 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6805 CGF.EmitAutoVarCleanups(Emission); 6806 } 6807 } 6808 } 6809 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6810 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6811 /*isSigned=*/false); 6812 if (DefaultThreadLimitVal) 6813 NumThreads = CGF.Builder.CreateSelect( 6814 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6815 DefaultThreadLimitVal, NumThreads); 6816 } else { 6817 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6818 : CGF.Builder.getInt32(0); 6819 } 6820 // Process condition of the if clause. 6821 if (CondVal) { 6822 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6823 CGF.Builder.getInt32(1)); 6824 } 6825 return NumThreads; 6826 } 6827 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6828 return CGF.Builder.getInt32(1); 6829 return DefaultThreadLimitVal; 6830 } 6831 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6832 : CGF.Builder.getInt32(0); 6833 } 6834 6835 /// Emit the number of threads for a target directive. Inspect the 6836 /// thread_limit clause associated with a teams construct combined or closely 6837 /// nested with the target directive. 6838 /// 6839 /// Emit the num_threads clause for directives such as 'target parallel' that 6840 /// have no associated teams construct. 6841 /// 6842 /// Otherwise, return nullptr. 6843 static llvm::Value * 6844 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6845 const OMPExecutableDirective &D) { 6846 assert(!CGF.getLangOpts().OpenMPIsDevice && 6847 "Clauses associated with the teams directive expected to be emitted " 6848 "only for the host!"); 6849 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6850 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6851 "Expected target-based executable directive."); 6852 CGBuilderTy &Bld = CGF.Builder; 6853 llvm::Value *ThreadLimitVal = nullptr; 6854 llvm::Value *NumThreadsVal = nullptr; 6855 switch (DirectiveKind) { 6856 case OMPD_target: { 6857 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6858 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6859 return NumThreads; 6860 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6861 CGF.getContext(), CS->getCapturedStmt()); 6862 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6863 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6864 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6865 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6866 const auto *ThreadLimitClause = 6867 Dir->getSingleClause<OMPThreadLimitClause>(); 6868 CodeGenFunction::LexicalScope Scope( 6869 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6870 if (const auto *PreInit = 6871 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6872 for (const auto *I : PreInit->decls()) { 6873 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6874 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6875 } else { 6876 CodeGenFunction::AutoVarEmission Emission = 6877 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6878 CGF.EmitAutoVarCleanups(Emission); 6879 } 6880 } 6881 } 6882 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6883 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6884 ThreadLimitVal = 6885 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6886 } 6887 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6888 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6889 CS = Dir->getInnermostCapturedStmt(); 6890 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6891 CGF.getContext(), CS->getCapturedStmt()); 6892 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6893 } 6894 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6895 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6896 CS = Dir->getInnermostCapturedStmt(); 6897 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6898 return NumThreads; 6899 } 6900 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6901 return Bld.getInt32(1); 6902 } 6903 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6904 } 6905 case OMPD_target_teams: { 6906 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6907 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6908 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6909 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6910 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6911 ThreadLimitVal = 6912 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6913 } 6914 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6915 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6916 return NumThreads; 6917 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6918 CGF.getContext(), CS->getCapturedStmt()); 6919 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6920 if (Dir->getDirectiveKind() == OMPD_distribute) { 6921 CS = Dir->getInnermostCapturedStmt(); 6922 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6923 return NumThreads; 6924 } 6925 } 6926 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6927 } 6928 case OMPD_target_teams_distribute: 6929 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6930 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6931 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6932 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6933 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6934 ThreadLimitVal = 6935 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6936 } 6937 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6938 case OMPD_target_parallel: 6939 case OMPD_target_parallel_for: 6940 case OMPD_target_parallel_for_simd: 6941 case OMPD_target_teams_distribute_parallel_for: 6942 case OMPD_target_teams_distribute_parallel_for_simd: { 6943 llvm::Value *CondVal = nullptr; 6944 // Handle if clause. If if clause present, the number of threads is 6945 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6946 if (D.hasClausesOfKind<OMPIfClause>()) { 6947 const OMPIfClause *IfClause = nullptr; 6948 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6949 if (C->getNameModifier() == OMPD_unknown || 6950 C->getNameModifier() == OMPD_parallel) { 6951 IfClause = C; 6952 break; 6953 } 6954 } 6955 if (IfClause) { 6956 const Expr *Cond = IfClause->getCondition(); 6957 bool Result; 6958 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6959 if (!Result) 6960 return Bld.getInt32(1); 6961 } else { 6962 CodeGenFunction::RunCleanupsScope Scope(CGF); 6963 CondVal = CGF.EvaluateExprAsBool(Cond); 6964 } 6965 } 6966 } 6967 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6968 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6969 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6970 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6971 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6972 ThreadLimitVal = 6973 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6974 } 6975 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6976 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6977 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6978 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6979 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6980 NumThreadsVal = 6981 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6982 ThreadLimitVal = ThreadLimitVal 6983 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6984 ThreadLimitVal), 6985 NumThreadsVal, ThreadLimitVal) 6986 : NumThreadsVal; 6987 } 6988 if (!ThreadLimitVal) 6989 ThreadLimitVal = Bld.getInt32(0); 6990 if (CondVal) 6991 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6992 return ThreadLimitVal; 6993 } 6994 case OMPD_target_teams_distribute_simd: 6995 case OMPD_target_simd: 6996 return Bld.getInt32(1); 6997 case OMPD_parallel: 6998 case OMPD_for: 6999 case OMPD_parallel_for: 7000 case OMPD_parallel_master: 7001 case OMPD_parallel_sections: 7002 case OMPD_for_simd: 7003 case OMPD_parallel_for_simd: 7004 case OMPD_cancel: 7005 case OMPD_cancellation_point: 7006 case OMPD_ordered: 7007 case OMPD_threadprivate: 7008 case OMPD_allocate: 7009 case OMPD_task: 7010 case OMPD_simd: 7011 case OMPD_tile: 7012 case OMPD_unroll: 7013 case OMPD_sections: 7014 case OMPD_section: 7015 case OMPD_single: 7016 case OMPD_master: 7017 case OMPD_critical: 7018 case OMPD_taskyield: 7019 case OMPD_barrier: 7020 case OMPD_taskwait: 7021 case OMPD_taskgroup: 7022 case OMPD_atomic: 7023 case OMPD_flush: 7024 case OMPD_depobj: 7025 case OMPD_scan: 7026 case OMPD_teams: 7027 case OMPD_target_data: 7028 case OMPD_target_exit_data: 7029 case OMPD_target_enter_data: 7030 case OMPD_distribute: 7031 case OMPD_distribute_simd: 7032 case OMPD_distribute_parallel_for: 7033 case OMPD_distribute_parallel_for_simd: 7034 case OMPD_teams_distribute: 7035 case OMPD_teams_distribute_simd: 7036 case OMPD_teams_distribute_parallel_for: 7037 case OMPD_teams_distribute_parallel_for_simd: 7038 case OMPD_target_update: 7039 case OMPD_declare_simd: 7040 case OMPD_declare_variant: 7041 case OMPD_begin_declare_variant: 7042 case OMPD_end_declare_variant: 7043 case OMPD_declare_target: 7044 case OMPD_end_declare_target: 7045 case OMPD_declare_reduction: 7046 case OMPD_declare_mapper: 7047 case OMPD_taskloop: 7048 case OMPD_taskloop_simd: 7049 case OMPD_master_taskloop: 7050 case OMPD_master_taskloop_simd: 7051 case OMPD_parallel_master_taskloop: 7052 case OMPD_parallel_master_taskloop_simd: 7053 case OMPD_requires: 7054 case OMPD_unknown: 7055 break; 7056 default: 7057 break; 7058 } 7059 llvm_unreachable("Unsupported directive kind."); 7060 } 7061 7062 namespace { 7063 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7064 7065 // Utility to handle information from clauses associated with a given 7066 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7067 // It provides a convenient interface to obtain the information and generate 7068 // code for that information. 7069 class MappableExprsHandler { 7070 public: 7071 /// Values for bit flags used to specify the mapping type for 7072 /// offloading. 7073 enum OpenMPOffloadMappingFlags : uint64_t { 7074 /// No flags 7075 OMP_MAP_NONE = 0x0, 7076 /// Allocate memory on the device and move data from host to device. 7077 OMP_MAP_TO = 0x01, 7078 /// Allocate memory on the device and move data from device to host. 7079 OMP_MAP_FROM = 0x02, 7080 /// Always perform the requested mapping action on the element, even 7081 /// if it was already mapped before. 7082 OMP_MAP_ALWAYS = 0x04, 7083 /// Delete the element from the device environment, ignoring the 7084 /// current reference count associated with the element. 7085 OMP_MAP_DELETE = 0x08, 7086 /// The element being mapped is a pointer-pointee pair; both the 7087 /// pointer and the pointee should be mapped. 7088 OMP_MAP_PTR_AND_OBJ = 0x10, 7089 /// This flags signals that the base address of an entry should be 7090 /// passed to the target kernel as an argument. 7091 OMP_MAP_TARGET_PARAM = 0x20, 7092 /// Signal that the runtime library has to return the device pointer 7093 /// in the current position for the data being mapped. Used when we have the 7094 /// use_device_ptr or use_device_addr clause. 7095 OMP_MAP_RETURN_PARAM = 0x40, 7096 /// This flag signals that the reference being passed is a pointer to 7097 /// private data. 7098 OMP_MAP_PRIVATE = 0x80, 7099 /// Pass the element to the device by value. 7100 OMP_MAP_LITERAL = 0x100, 7101 /// Implicit map 7102 OMP_MAP_IMPLICIT = 0x200, 7103 /// Close is a hint to the runtime to allocate memory close to 7104 /// the target device. 7105 OMP_MAP_CLOSE = 0x400, 7106 /// 0x800 is reserved for compatibility with XLC. 7107 /// Produce a runtime error if the data is not already allocated. 7108 OMP_MAP_PRESENT = 0x1000, 7109 /// Signal that the runtime library should use args as an array of 7110 /// descriptor_dim pointers and use args_size as dims. Used when we have 7111 /// non-contiguous list items in target update directive 7112 OMP_MAP_NON_CONTIG = 0x100000000000, 7113 /// The 16 MSBs of the flags indicate whether the entry is member of some 7114 /// struct/class. 7115 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7116 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7117 }; 7118 7119 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7120 static unsigned getFlagMemberOffset() { 7121 unsigned Offset = 0; 7122 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7123 Remain = Remain >> 1) 7124 Offset++; 7125 return Offset; 7126 } 7127 7128 /// Class that holds debugging information for a data mapping to be passed to 7129 /// the runtime library. 7130 class MappingExprInfo { 7131 /// The variable declaration used for the data mapping. 7132 const ValueDecl *MapDecl = nullptr; 7133 /// The original expression used in the map clause, or null if there is 7134 /// none. 7135 const Expr *MapExpr = nullptr; 7136 7137 public: 7138 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7139 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7140 7141 const ValueDecl *getMapDecl() const { return MapDecl; } 7142 const Expr *getMapExpr() const { return MapExpr; } 7143 }; 7144 7145 /// Class that associates information with a base pointer to be passed to the 7146 /// runtime library. 7147 class BasePointerInfo { 7148 /// The base pointer. 7149 llvm::Value *Ptr = nullptr; 7150 /// The base declaration that refers to this device pointer, or null if 7151 /// there is none. 7152 const ValueDecl *DevPtrDecl = nullptr; 7153 7154 public: 7155 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7156 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7157 llvm::Value *operator*() const { return Ptr; } 7158 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7159 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7160 }; 7161 7162 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7163 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7164 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7165 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7166 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7167 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7168 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7169 7170 /// This structure contains combined information generated for mappable 7171 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7172 /// mappers, and non-contiguous information. 7173 struct MapCombinedInfoTy { 7174 struct StructNonContiguousInfo { 7175 bool IsNonContiguous = false; 7176 MapDimArrayTy Dims; 7177 MapNonContiguousArrayTy Offsets; 7178 MapNonContiguousArrayTy Counts; 7179 MapNonContiguousArrayTy Strides; 7180 }; 7181 MapExprsArrayTy Exprs; 7182 MapBaseValuesArrayTy BasePointers; 7183 MapValuesArrayTy Pointers; 7184 MapValuesArrayTy Sizes; 7185 MapFlagsArrayTy Types; 7186 MapMappersArrayTy Mappers; 7187 StructNonContiguousInfo NonContigInfo; 7188 7189 /// Append arrays in \a CurInfo. 7190 void append(MapCombinedInfoTy &CurInfo) { 7191 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7192 BasePointers.append(CurInfo.BasePointers.begin(), 7193 CurInfo.BasePointers.end()); 7194 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7195 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7196 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7197 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7198 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7199 CurInfo.NonContigInfo.Dims.end()); 7200 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7201 CurInfo.NonContigInfo.Offsets.end()); 7202 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7203 CurInfo.NonContigInfo.Counts.end()); 7204 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7205 CurInfo.NonContigInfo.Strides.end()); 7206 } 7207 }; 7208 7209 /// Map between a struct and the its lowest & highest elements which have been 7210 /// mapped. 7211 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7212 /// HE(FieldIndex, Pointer)} 7213 struct StructRangeInfoTy { 7214 MapCombinedInfoTy PreliminaryMapData; 7215 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7216 0, Address::invalid()}; 7217 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7218 0, Address::invalid()}; 7219 Address Base = Address::invalid(); 7220 Address LB = Address::invalid(); 7221 bool IsArraySection = false; 7222 bool HasCompleteRecord = false; 7223 }; 7224 7225 private: 7226 /// Kind that defines how a device pointer has to be returned. 7227 struct MapInfo { 7228 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7229 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7230 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7231 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7232 bool ReturnDevicePointer = false; 7233 bool IsImplicit = false; 7234 const ValueDecl *Mapper = nullptr; 7235 const Expr *VarRef = nullptr; 7236 bool ForDeviceAddr = false; 7237 7238 MapInfo() = default; 7239 MapInfo( 7240 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7241 OpenMPMapClauseKind MapType, 7242 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7243 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7244 bool ReturnDevicePointer, bool IsImplicit, 7245 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7246 bool ForDeviceAddr = false) 7247 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7248 MotionModifiers(MotionModifiers), 7249 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7250 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7251 }; 7252 7253 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7254 /// member and there is no map information about it, then emission of that 7255 /// entry is deferred until the whole struct has been processed. 7256 struct DeferredDevicePtrEntryTy { 7257 const Expr *IE = nullptr; 7258 const ValueDecl *VD = nullptr; 7259 bool ForDeviceAddr = false; 7260 7261 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7262 bool ForDeviceAddr) 7263 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7264 }; 7265 7266 /// The target directive from where the mappable clauses were extracted. It 7267 /// is either a executable directive or a user-defined mapper directive. 7268 llvm::PointerUnion<const OMPExecutableDirective *, 7269 const OMPDeclareMapperDecl *> 7270 CurDir; 7271 7272 /// Function the directive is being generated for. 7273 CodeGenFunction &CGF; 7274 7275 /// Set of all first private variables in the current directive. 7276 /// bool data is set to true if the variable is implicitly marked as 7277 /// firstprivate, false otherwise. 7278 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7279 7280 /// Map between device pointer declarations and their expression components. 7281 /// The key value for declarations in 'this' is null. 7282 llvm::DenseMap< 7283 const ValueDecl *, 7284 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7285 DevPointersMap; 7286 7287 llvm::Value *getExprTypeSize(const Expr *E) const { 7288 QualType ExprTy = E->getType().getCanonicalType(); 7289 7290 // Calculate the size for array shaping expression. 7291 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7292 llvm::Value *Size = 7293 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7294 for (const Expr *SE : OAE->getDimensions()) { 7295 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7296 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7297 CGF.getContext().getSizeType(), 7298 SE->getExprLoc()); 7299 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7300 } 7301 return Size; 7302 } 7303 7304 // Reference types are ignored for mapping purposes. 7305 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7306 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7307 7308 // Given that an array section is considered a built-in type, we need to 7309 // do the calculation based on the length of the section instead of relying 7310 // on CGF.getTypeSize(E->getType()). 7311 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7312 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7313 OAE->getBase()->IgnoreParenImpCasts()) 7314 .getCanonicalType(); 7315 7316 // If there is no length associated with the expression and lower bound is 7317 // not specified too, that means we are using the whole length of the 7318 // base. 7319 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7320 !OAE->getLowerBound()) 7321 return CGF.getTypeSize(BaseTy); 7322 7323 llvm::Value *ElemSize; 7324 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7325 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7326 } else { 7327 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7328 assert(ATy && "Expecting array type if not a pointer type."); 7329 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7330 } 7331 7332 // If we don't have a length at this point, that is because we have an 7333 // array section with a single element. 7334 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7335 return ElemSize; 7336 7337 if (const Expr *LenExpr = OAE->getLength()) { 7338 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7339 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7340 CGF.getContext().getSizeType(), 7341 LenExpr->getExprLoc()); 7342 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7343 } 7344 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7345 OAE->getLowerBound() && "expected array_section[lb:]."); 7346 // Size = sizetype - lb * elemtype; 7347 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7348 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7349 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7350 CGF.getContext().getSizeType(), 7351 OAE->getLowerBound()->getExprLoc()); 7352 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7353 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7354 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7355 LengthVal = CGF.Builder.CreateSelect( 7356 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7357 return LengthVal; 7358 } 7359 return CGF.getTypeSize(ExprTy); 7360 } 7361 7362 /// Return the corresponding bits for a given map clause modifier. Add 7363 /// a flag marking the map as a pointer if requested. Add a flag marking the 7364 /// map as the first one of a series of maps that relate to the same map 7365 /// expression. 7366 OpenMPOffloadMappingFlags getMapTypeBits( 7367 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7368 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7369 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7370 OpenMPOffloadMappingFlags Bits = 7371 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7372 switch (MapType) { 7373 case OMPC_MAP_alloc: 7374 case OMPC_MAP_release: 7375 // alloc and release is the default behavior in the runtime library, i.e. 7376 // if we don't pass any bits alloc/release that is what the runtime is 7377 // going to do. Therefore, we don't need to signal anything for these two 7378 // type modifiers. 7379 break; 7380 case OMPC_MAP_to: 7381 Bits |= OMP_MAP_TO; 7382 break; 7383 case OMPC_MAP_from: 7384 Bits |= OMP_MAP_FROM; 7385 break; 7386 case OMPC_MAP_tofrom: 7387 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7388 break; 7389 case OMPC_MAP_delete: 7390 Bits |= OMP_MAP_DELETE; 7391 break; 7392 case OMPC_MAP_unknown: 7393 llvm_unreachable("Unexpected map type!"); 7394 } 7395 if (AddPtrFlag) 7396 Bits |= OMP_MAP_PTR_AND_OBJ; 7397 if (AddIsTargetParamFlag) 7398 Bits |= OMP_MAP_TARGET_PARAM; 7399 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7400 != MapModifiers.end()) 7401 Bits |= OMP_MAP_ALWAYS; 7402 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7403 != MapModifiers.end()) 7404 Bits |= OMP_MAP_CLOSE; 7405 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7406 MapModifiers.end() || 7407 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7408 MotionModifiers.end()) 7409 Bits |= OMP_MAP_PRESENT; 7410 if (IsNonContiguous) 7411 Bits |= OMP_MAP_NON_CONTIG; 7412 return Bits; 7413 } 7414 7415 /// Return true if the provided expression is a final array section. A 7416 /// final array section, is one whose length can't be proved to be one. 7417 bool isFinalArraySectionExpression(const Expr *E) const { 7418 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7419 7420 // It is not an array section and therefore not a unity-size one. 7421 if (!OASE) 7422 return false; 7423 7424 // An array section with no colon always refer to a single element. 7425 if (OASE->getColonLocFirst().isInvalid()) 7426 return false; 7427 7428 const Expr *Length = OASE->getLength(); 7429 7430 // If we don't have a length we have to check if the array has size 1 7431 // for this dimension. Also, we should always expect a length if the 7432 // base type is pointer. 7433 if (!Length) { 7434 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7435 OASE->getBase()->IgnoreParenImpCasts()) 7436 .getCanonicalType(); 7437 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7438 return ATy->getSize().getSExtValue() != 1; 7439 // If we don't have a constant dimension length, we have to consider 7440 // the current section as having any size, so it is not necessarily 7441 // unitary. If it happen to be unity size, that's user fault. 7442 return true; 7443 } 7444 7445 // Check if the length evaluates to 1. 7446 Expr::EvalResult Result; 7447 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7448 return true; // Can have more that size 1. 7449 7450 llvm::APSInt ConstLength = Result.Val.getInt(); 7451 return ConstLength.getSExtValue() != 1; 7452 } 7453 7454 /// Generate the base pointers, section pointers, sizes, map type bits, and 7455 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7456 /// map type, map or motion modifiers, and expression components. 7457 /// \a IsFirstComponent should be set to true if the provided set of 7458 /// components is the first associated with a capture. 7459 void generateInfoForComponentList( 7460 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7461 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7462 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7463 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7464 bool IsFirstComponentList, bool IsImplicit, 7465 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7466 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7467 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7468 OverlappedElements = llvm::None) const { 7469 // The following summarizes what has to be generated for each map and the 7470 // types below. The generated information is expressed in this order: 7471 // base pointer, section pointer, size, flags 7472 // (to add to the ones that come from the map type and modifier). 7473 // 7474 // double d; 7475 // int i[100]; 7476 // float *p; 7477 // 7478 // struct S1 { 7479 // int i; 7480 // float f[50]; 7481 // } 7482 // struct S2 { 7483 // int i; 7484 // float f[50]; 7485 // S1 s; 7486 // double *p; 7487 // struct S2 *ps; 7488 // int &ref; 7489 // } 7490 // S2 s; 7491 // S2 *ps; 7492 // 7493 // map(d) 7494 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7495 // 7496 // map(i) 7497 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7498 // 7499 // map(i[1:23]) 7500 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7501 // 7502 // map(p) 7503 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7504 // 7505 // map(p[1:24]) 7506 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7507 // in unified shared memory mode or for local pointers 7508 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7509 // 7510 // map(s) 7511 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7512 // 7513 // map(s.i) 7514 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7515 // 7516 // map(s.s.f) 7517 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7518 // 7519 // map(s.p) 7520 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7521 // 7522 // map(to: s.p[:22]) 7523 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7524 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7525 // &(s.p), &(s.p[0]), 22*sizeof(double), 7526 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7527 // (*) alloc space for struct members, only this is a target parameter 7528 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7529 // optimizes this entry out, same in the examples below) 7530 // (***) map the pointee (map: to) 7531 // 7532 // map(to: s.ref) 7533 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7534 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7535 // (*) alloc space for struct members, only this is a target parameter 7536 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7537 // optimizes this entry out, same in the examples below) 7538 // (***) map the pointee (map: to) 7539 // 7540 // map(s.ps) 7541 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7542 // 7543 // map(from: s.ps->s.i) 7544 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7545 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7546 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7547 // 7548 // map(to: s.ps->ps) 7549 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7550 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7551 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7552 // 7553 // map(s.ps->ps->ps) 7554 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7555 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7556 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7557 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7558 // 7559 // map(to: s.ps->ps->s.f[:22]) 7560 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7561 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7562 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7563 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7564 // 7565 // map(ps) 7566 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7567 // 7568 // map(ps->i) 7569 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7570 // 7571 // map(ps->s.f) 7572 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7573 // 7574 // map(from: ps->p) 7575 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7576 // 7577 // map(to: ps->p[:22]) 7578 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7579 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7580 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7581 // 7582 // map(ps->ps) 7583 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7584 // 7585 // map(from: ps->ps->s.i) 7586 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7587 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7588 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7589 // 7590 // map(from: ps->ps->ps) 7591 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7592 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7593 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7594 // 7595 // map(ps->ps->ps->ps) 7596 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7597 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7598 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7599 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7600 // 7601 // map(to: ps->ps->ps->s.f[:22]) 7602 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7603 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7604 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7605 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7606 // 7607 // map(to: s.f[:22]) map(from: s.p[:33]) 7608 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7609 // sizeof(double*) (**), TARGET_PARAM 7610 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7611 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7612 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7613 // (*) allocate contiguous space needed to fit all mapped members even if 7614 // we allocate space for members not mapped (in this example, 7615 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7616 // them as well because they fall between &s.f[0] and &s.p) 7617 // 7618 // map(from: s.f[:22]) map(to: ps->p[:33]) 7619 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7620 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7621 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7622 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7623 // (*) the struct this entry pertains to is the 2nd element in the list of 7624 // arguments, hence MEMBER_OF(2) 7625 // 7626 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7627 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7628 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7629 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7630 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7631 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7632 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7633 // (*) the struct this entry pertains to is the 4th element in the list 7634 // of arguments, hence MEMBER_OF(4) 7635 7636 // Track if the map information being generated is the first for a capture. 7637 bool IsCaptureFirstInfo = IsFirstComponentList; 7638 // When the variable is on a declare target link or in a to clause with 7639 // unified memory, a reference is needed to hold the host/device address 7640 // of the variable. 7641 bool RequiresReference = false; 7642 7643 // Scan the components from the base to the complete expression. 7644 auto CI = Components.rbegin(); 7645 auto CE = Components.rend(); 7646 auto I = CI; 7647 7648 // Track if the map information being generated is the first for a list of 7649 // components. 7650 bool IsExpressionFirstInfo = true; 7651 bool FirstPointerInComplexData = false; 7652 Address BP = Address::invalid(); 7653 const Expr *AssocExpr = I->getAssociatedExpression(); 7654 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7655 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7656 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7657 7658 if (isa<MemberExpr>(AssocExpr)) { 7659 // The base is the 'this' pointer. The content of the pointer is going 7660 // to be the base of the field being mapped. 7661 BP = CGF.LoadCXXThisAddress(); 7662 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7663 (OASE && 7664 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7665 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7666 } else if (OAShE && 7667 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7668 BP = Address( 7669 CGF.EmitScalarExpr(OAShE->getBase()), 7670 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7671 } else { 7672 // The base is the reference to the variable. 7673 // BP = &Var. 7674 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7675 if (const auto *VD = 7676 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7677 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7678 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7679 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7680 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7681 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7682 RequiresReference = true; 7683 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7684 } 7685 } 7686 } 7687 7688 // If the variable is a pointer and is being dereferenced (i.e. is not 7689 // the last component), the base has to be the pointer itself, not its 7690 // reference. References are ignored for mapping purposes. 7691 QualType Ty = 7692 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7693 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7694 // No need to generate individual map information for the pointer, it 7695 // can be associated with the combined storage if shared memory mode is 7696 // active or the base declaration is not global variable. 7697 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7698 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7699 !VD || VD->hasLocalStorage()) 7700 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7701 else 7702 FirstPointerInComplexData = true; 7703 ++I; 7704 } 7705 } 7706 7707 // Track whether a component of the list should be marked as MEMBER_OF some 7708 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7709 // in a component list should be marked as MEMBER_OF, all subsequent entries 7710 // do not belong to the base struct. E.g. 7711 // struct S2 s; 7712 // s.ps->ps->ps->f[:] 7713 // (1) (2) (3) (4) 7714 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7715 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7716 // is the pointee of ps(2) which is not member of struct s, so it should not 7717 // be marked as such (it is still PTR_AND_OBJ). 7718 // The variable is initialized to false so that PTR_AND_OBJ entries which 7719 // are not struct members are not considered (e.g. array of pointers to 7720 // data). 7721 bool ShouldBeMemberOf = false; 7722 7723 // Variable keeping track of whether or not we have encountered a component 7724 // in the component list which is a member expression. Useful when we have a 7725 // pointer or a final array section, in which case it is the previous 7726 // component in the list which tells us whether we have a member expression. 7727 // E.g. X.f[:] 7728 // While processing the final array section "[:]" it is "f" which tells us 7729 // whether we are dealing with a member of a declared struct. 7730 const MemberExpr *EncounteredME = nullptr; 7731 7732 // Track for the total number of dimension. Start from one for the dummy 7733 // dimension. 7734 uint64_t DimSize = 1; 7735 7736 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7737 bool IsPrevMemberReference = false; 7738 7739 for (; I != CE; ++I) { 7740 // If the current component is member of a struct (parent struct) mark it. 7741 if (!EncounteredME) { 7742 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7743 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7744 // as MEMBER_OF the parent struct. 7745 if (EncounteredME) { 7746 ShouldBeMemberOf = true; 7747 // Do not emit as complex pointer if this is actually not array-like 7748 // expression. 7749 if (FirstPointerInComplexData) { 7750 QualType Ty = std::prev(I) 7751 ->getAssociatedDeclaration() 7752 ->getType() 7753 .getNonReferenceType(); 7754 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7755 FirstPointerInComplexData = false; 7756 } 7757 } 7758 } 7759 7760 auto Next = std::next(I); 7761 7762 // We need to generate the addresses and sizes if this is the last 7763 // component, if the component is a pointer or if it is an array section 7764 // whose length can't be proved to be one. If this is a pointer, it 7765 // becomes the base address for the following components. 7766 7767 // A final array section, is one whose length can't be proved to be one. 7768 // If the map item is non-contiguous then we don't treat any array section 7769 // as final array section. 7770 bool IsFinalArraySection = 7771 !IsNonContiguous && 7772 isFinalArraySectionExpression(I->getAssociatedExpression()); 7773 7774 // If we have a declaration for the mapping use that, otherwise use 7775 // the base declaration of the map clause. 7776 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7777 ? I->getAssociatedDeclaration() 7778 : BaseDecl; 7779 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7780 : MapExpr; 7781 7782 // Get information on whether the element is a pointer. Have to do a 7783 // special treatment for array sections given that they are built-in 7784 // types. 7785 const auto *OASE = 7786 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7787 const auto *OAShE = 7788 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7789 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7790 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7791 bool IsPointer = 7792 OAShE || 7793 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7794 .getCanonicalType() 7795 ->isAnyPointerType()) || 7796 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7797 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7798 MapDecl && 7799 MapDecl->getType()->isLValueReferenceType(); 7800 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7801 7802 if (OASE) 7803 ++DimSize; 7804 7805 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7806 IsFinalArraySection) { 7807 // If this is not the last component, we expect the pointer to be 7808 // associated with an array expression or member expression. 7809 assert((Next == CE || 7810 isa<MemberExpr>(Next->getAssociatedExpression()) || 7811 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7812 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7813 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7814 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7815 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7816 "Unexpected expression"); 7817 7818 Address LB = Address::invalid(); 7819 Address LowestElem = Address::invalid(); 7820 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7821 const MemberExpr *E) { 7822 const Expr *BaseExpr = E->getBase(); 7823 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7824 // scalar. 7825 LValue BaseLV; 7826 if (E->isArrow()) { 7827 LValueBaseInfo BaseInfo; 7828 TBAAAccessInfo TBAAInfo; 7829 Address Addr = 7830 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7831 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7832 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7833 } else { 7834 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7835 } 7836 return BaseLV; 7837 }; 7838 if (OAShE) { 7839 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7840 CGF.getContext().getTypeAlignInChars( 7841 OAShE->getBase()->getType())); 7842 } else if (IsMemberReference) { 7843 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7844 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7845 LowestElem = CGF.EmitLValueForFieldInitialization( 7846 BaseLVal, cast<FieldDecl>(MapDecl)) 7847 .getAddress(CGF); 7848 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7849 .getAddress(CGF); 7850 } else { 7851 LowestElem = LB = 7852 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7853 .getAddress(CGF); 7854 } 7855 7856 // If this component is a pointer inside the base struct then we don't 7857 // need to create any entry for it - it will be combined with the object 7858 // it is pointing to into a single PTR_AND_OBJ entry. 7859 bool IsMemberPointerOrAddr = 7860 EncounteredME && 7861 (((IsPointer || ForDeviceAddr) && 7862 I->getAssociatedExpression() == EncounteredME) || 7863 (IsPrevMemberReference && !IsPointer) || 7864 (IsMemberReference && Next != CE && 7865 !Next->getAssociatedExpression()->getType()->isPointerType())); 7866 if (!OverlappedElements.empty() && Next == CE) { 7867 // Handle base element with the info for overlapped elements. 7868 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7869 assert(!IsPointer && 7870 "Unexpected base element with the pointer type."); 7871 // Mark the whole struct as the struct that requires allocation on the 7872 // device. 7873 PartialStruct.LowestElem = {0, LowestElem}; 7874 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7875 I->getAssociatedExpression()->getType()); 7876 Address HB = CGF.Builder.CreateConstGEP( 7877 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7878 CGF.VoidPtrTy), 7879 TypeSize.getQuantity() - 1); 7880 PartialStruct.HighestElem = { 7881 std::numeric_limits<decltype( 7882 PartialStruct.HighestElem.first)>::max(), 7883 HB}; 7884 PartialStruct.Base = BP; 7885 PartialStruct.LB = LB; 7886 assert( 7887 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7888 "Overlapped elements must be used only once for the variable."); 7889 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7890 // Emit data for non-overlapped data. 7891 OpenMPOffloadMappingFlags Flags = 7892 OMP_MAP_MEMBER_OF | 7893 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7894 /*AddPtrFlag=*/false, 7895 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7896 llvm::Value *Size = nullptr; 7897 // Do bitcopy of all non-overlapped structure elements. 7898 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7899 Component : OverlappedElements) { 7900 Address ComponentLB = Address::invalid(); 7901 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7902 Component) { 7903 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7904 const auto *FD = dyn_cast<FieldDecl>(VD); 7905 if (FD && FD->getType()->isLValueReferenceType()) { 7906 const auto *ME = 7907 cast<MemberExpr>(MC.getAssociatedExpression()); 7908 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7909 ComponentLB = 7910 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7911 .getAddress(CGF); 7912 } else { 7913 ComponentLB = 7914 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7915 .getAddress(CGF); 7916 } 7917 Size = CGF.Builder.CreatePtrDiff( 7918 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7919 CGF.EmitCastToVoidPtr(LB.getPointer())); 7920 break; 7921 } 7922 } 7923 assert(Size && "Failed to determine structure size"); 7924 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7925 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7926 CombinedInfo.Pointers.push_back(LB.getPointer()); 7927 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7928 Size, CGF.Int64Ty, /*isSigned=*/true)); 7929 CombinedInfo.Types.push_back(Flags); 7930 CombinedInfo.Mappers.push_back(nullptr); 7931 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7932 : 1); 7933 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7934 } 7935 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7936 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7937 CombinedInfo.Pointers.push_back(LB.getPointer()); 7938 Size = CGF.Builder.CreatePtrDiff( 7939 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7940 CGF.EmitCastToVoidPtr(LB.getPointer())); 7941 CombinedInfo.Sizes.push_back( 7942 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7943 CombinedInfo.Types.push_back(Flags); 7944 CombinedInfo.Mappers.push_back(nullptr); 7945 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7946 : 1); 7947 break; 7948 } 7949 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7950 if (!IsMemberPointerOrAddr || 7951 (Next == CE && MapType != OMPC_MAP_unknown)) { 7952 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7953 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7954 CombinedInfo.Pointers.push_back(LB.getPointer()); 7955 CombinedInfo.Sizes.push_back( 7956 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7957 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7958 : 1); 7959 7960 // If Mapper is valid, the last component inherits the mapper. 7961 bool HasMapper = Mapper && Next == CE; 7962 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7963 7964 // We need to add a pointer flag for each map that comes from the 7965 // same expression except for the first one. We also need to signal 7966 // this map is the first one that relates with the current capture 7967 // (there is a set of entries for each capture). 7968 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7969 MapType, MapModifiers, MotionModifiers, IsImplicit, 7970 !IsExpressionFirstInfo || RequiresReference || 7971 FirstPointerInComplexData || IsMemberReference, 7972 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7973 7974 if (!IsExpressionFirstInfo || IsMemberReference) { 7975 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7976 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7977 if (IsPointer || (IsMemberReference && Next != CE)) 7978 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7979 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7980 7981 if (ShouldBeMemberOf) { 7982 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7983 // should be later updated with the correct value of MEMBER_OF. 7984 Flags |= OMP_MAP_MEMBER_OF; 7985 // From now on, all subsequent PTR_AND_OBJ entries should not be 7986 // marked as MEMBER_OF. 7987 ShouldBeMemberOf = false; 7988 } 7989 } 7990 7991 CombinedInfo.Types.push_back(Flags); 7992 } 7993 7994 // If we have encountered a member expression so far, keep track of the 7995 // mapped member. If the parent is "*this", then the value declaration 7996 // is nullptr. 7997 if (EncounteredME) { 7998 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7999 unsigned FieldIndex = FD->getFieldIndex(); 8000 8001 // Update info about the lowest and highest elements for this struct 8002 if (!PartialStruct.Base.isValid()) { 8003 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8004 if (IsFinalArraySection) { 8005 Address HB = 8006 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8007 .getAddress(CGF); 8008 PartialStruct.HighestElem = {FieldIndex, HB}; 8009 } else { 8010 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8011 } 8012 PartialStruct.Base = BP; 8013 PartialStruct.LB = BP; 8014 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8015 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8016 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8017 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8018 } 8019 } 8020 8021 // Need to emit combined struct for array sections. 8022 if (IsFinalArraySection || IsNonContiguous) 8023 PartialStruct.IsArraySection = true; 8024 8025 // If we have a final array section, we are done with this expression. 8026 if (IsFinalArraySection) 8027 break; 8028 8029 // The pointer becomes the base for the next element. 8030 if (Next != CE) 8031 BP = IsMemberReference ? LowestElem : LB; 8032 8033 IsExpressionFirstInfo = false; 8034 IsCaptureFirstInfo = false; 8035 FirstPointerInComplexData = false; 8036 IsPrevMemberReference = IsMemberReference; 8037 } else if (FirstPointerInComplexData) { 8038 QualType Ty = Components.rbegin() 8039 ->getAssociatedDeclaration() 8040 ->getType() 8041 .getNonReferenceType(); 8042 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8043 FirstPointerInComplexData = false; 8044 } 8045 } 8046 // If ran into the whole component - allocate the space for the whole 8047 // record. 8048 if (!EncounteredME) 8049 PartialStruct.HasCompleteRecord = true; 8050 8051 if (!IsNonContiguous) 8052 return; 8053 8054 const ASTContext &Context = CGF.getContext(); 8055 8056 // For supporting stride in array section, we need to initialize the first 8057 // dimension size as 1, first offset as 0, and first count as 1 8058 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8059 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8060 MapValuesArrayTy CurStrides; 8061 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8062 uint64_t ElementTypeSize; 8063 8064 // Collect Size information for each dimension and get the element size as 8065 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8066 // should be [10, 10] and the first stride is 4 btyes. 8067 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8068 Components) { 8069 const Expr *AssocExpr = Component.getAssociatedExpression(); 8070 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8071 8072 if (!OASE) 8073 continue; 8074 8075 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8076 auto *CAT = Context.getAsConstantArrayType(Ty); 8077 auto *VAT = Context.getAsVariableArrayType(Ty); 8078 8079 // We need all the dimension size except for the last dimension. 8080 assert((VAT || CAT || &Component == &*Components.begin()) && 8081 "Should be either ConstantArray or VariableArray if not the " 8082 "first Component"); 8083 8084 // Get element size if CurStrides is empty. 8085 if (CurStrides.empty()) { 8086 const Type *ElementType = nullptr; 8087 if (CAT) 8088 ElementType = CAT->getElementType().getTypePtr(); 8089 else if (VAT) 8090 ElementType = VAT->getElementType().getTypePtr(); 8091 else 8092 assert(&Component == &*Components.begin() && 8093 "Only expect pointer (non CAT or VAT) when this is the " 8094 "first Component"); 8095 // If ElementType is null, then it means the base is a pointer 8096 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8097 // for next iteration. 8098 if (ElementType) { 8099 // For the case that having pointer as base, we need to remove one 8100 // level of indirection. 8101 if (&Component != &*Components.begin()) 8102 ElementType = ElementType->getPointeeOrArrayElementType(); 8103 ElementTypeSize = 8104 Context.getTypeSizeInChars(ElementType).getQuantity(); 8105 CurStrides.push_back( 8106 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8107 } 8108 } 8109 // Get dimension value except for the last dimension since we don't need 8110 // it. 8111 if (DimSizes.size() < Components.size() - 1) { 8112 if (CAT) 8113 DimSizes.push_back(llvm::ConstantInt::get( 8114 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8115 else if (VAT) 8116 DimSizes.push_back(CGF.Builder.CreateIntCast( 8117 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8118 /*IsSigned=*/false)); 8119 } 8120 } 8121 8122 // Skip the dummy dimension since we have already have its information. 8123 auto DI = DimSizes.begin() + 1; 8124 // Product of dimension. 8125 llvm::Value *DimProd = 8126 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8127 8128 // Collect info for non-contiguous. Notice that offset, count, and stride 8129 // are only meaningful for array-section, so we insert a null for anything 8130 // other than array-section. 8131 // Also, the size of offset, count, and stride are not the same as 8132 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8133 // count, and stride are the same as the number of non-contiguous 8134 // declaration in target update to/from clause. 8135 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8136 Components) { 8137 const Expr *AssocExpr = Component.getAssociatedExpression(); 8138 8139 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8140 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8141 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8142 /*isSigned=*/false); 8143 CurOffsets.push_back(Offset); 8144 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8145 CurStrides.push_back(CurStrides.back()); 8146 continue; 8147 } 8148 8149 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8150 8151 if (!OASE) 8152 continue; 8153 8154 // Offset 8155 const Expr *OffsetExpr = OASE->getLowerBound(); 8156 llvm::Value *Offset = nullptr; 8157 if (!OffsetExpr) { 8158 // If offset is absent, then we just set it to zero. 8159 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8160 } else { 8161 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8162 CGF.Int64Ty, 8163 /*isSigned=*/false); 8164 } 8165 CurOffsets.push_back(Offset); 8166 8167 // Count 8168 const Expr *CountExpr = OASE->getLength(); 8169 llvm::Value *Count = nullptr; 8170 if (!CountExpr) { 8171 // In Clang, once a high dimension is an array section, we construct all 8172 // the lower dimension as array section, however, for case like 8173 // arr[0:2][2], Clang construct the inner dimension as an array section 8174 // but it actually is not in an array section form according to spec. 8175 if (!OASE->getColonLocFirst().isValid() && 8176 !OASE->getColonLocSecond().isValid()) { 8177 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8178 } else { 8179 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8180 // When the length is absent it defaults to ⌈(size − 8181 // lower-bound)/stride⌉, where size is the size of the array 8182 // dimension. 8183 const Expr *StrideExpr = OASE->getStride(); 8184 llvm::Value *Stride = 8185 StrideExpr 8186 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8187 CGF.Int64Ty, /*isSigned=*/false) 8188 : nullptr; 8189 if (Stride) 8190 Count = CGF.Builder.CreateUDiv( 8191 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8192 else 8193 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8194 } 8195 } else { 8196 Count = CGF.EmitScalarExpr(CountExpr); 8197 } 8198 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8199 CurCounts.push_back(Count); 8200 8201 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8202 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8203 // Offset Count Stride 8204 // D0 0 1 4 (int) <- dummy dimension 8205 // D1 0 2 8 (2 * (1) * 4) 8206 // D2 1 2 20 (1 * (1 * 5) * 4) 8207 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8208 const Expr *StrideExpr = OASE->getStride(); 8209 llvm::Value *Stride = 8210 StrideExpr 8211 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8212 CGF.Int64Ty, /*isSigned=*/false) 8213 : nullptr; 8214 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8215 if (Stride) 8216 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8217 else 8218 CurStrides.push_back(DimProd); 8219 if (DI != DimSizes.end()) 8220 ++DI; 8221 } 8222 8223 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8224 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8225 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8226 } 8227 8228 /// Return the adjusted map modifiers if the declaration a capture refers to 8229 /// appears in a first-private clause. This is expected to be used only with 8230 /// directives that start with 'target'. 8231 MappableExprsHandler::OpenMPOffloadMappingFlags 8232 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8233 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8234 8235 // A first private variable captured by reference will use only the 8236 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8237 // declaration is known as first-private in this handler. 8238 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8239 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8240 return MappableExprsHandler::OMP_MAP_TO | 8241 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8242 return MappableExprsHandler::OMP_MAP_PRIVATE | 8243 MappableExprsHandler::OMP_MAP_TO; 8244 } 8245 return MappableExprsHandler::OMP_MAP_TO | 8246 MappableExprsHandler::OMP_MAP_FROM; 8247 } 8248 8249 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8250 // Rotate by getFlagMemberOffset() bits. 8251 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8252 << getFlagMemberOffset()); 8253 } 8254 8255 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8256 OpenMPOffloadMappingFlags MemberOfFlag) { 8257 // If the entry is PTR_AND_OBJ but has not been marked with the special 8258 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8259 // marked as MEMBER_OF. 8260 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8261 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8262 return; 8263 8264 // Reset the placeholder value to prepare the flag for the assignment of the 8265 // proper MEMBER_OF value. 8266 Flags &= ~OMP_MAP_MEMBER_OF; 8267 Flags |= MemberOfFlag; 8268 } 8269 8270 void getPlainLayout(const CXXRecordDecl *RD, 8271 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8272 bool AsBase) const { 8273 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8274 8275 llvm::StructType *St = 8276 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8277 8278 unsigned NumElements = St->getNumElements(); 8279 llvm::SmallVector< 8280 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8281 RecordLayout(NumElements); 8282 8283 // Fill bases. 8284 for (const auto &I : RD->bases()) { 8285 if (I.isVirtual()) 8286 continue; 8287 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8288 // Ignore empty bases. 8289 if (Base->isEmpty() || CGF.getContext() 8290 .getASTRecordLayout(Base) 8291 .getNonVirtualSize() 8292 .isZero()) 8293 continue; 8294 8295 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8296 RecordLayout[FieldIndex] = Base; 8297 } 8298 // Fill in virtual bases. 8299 for (const auto &I : RD->vbases()) { 8300 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8301 // Ignore empty bases. 8302 if (Base->isEmpty()) 8303 continue; 8304 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8305 if (RecordLayout[FieldIndex]) 8306 continue; 8307 RecordLayout[FieldIndex] = Base; 8308 } 8309 // Fill in all the fields. 8310 assert(!RD->isUnion() && "Unexpected union."); 8311 for (const auto *Field : RD->fields()) { 8312 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8313 // will fill in later.) 8314 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8315 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8316 RecordLayout[FieldIndex] = Field; 8317 } 8318 } 8319 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8320 &Data : RecordLayout) { 8321 if (Data.isNull()) 8322 continue; 8323 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8324 getPlainLayout(Base, Layout, /*AsBase=*/true); 8325 else 8326 Layout.push_back(Data.get<const FieldDecl *>()); 8327 } 8328 } 8329 8330 /// Generate all the base pointers, section pointers, sizes, map types, and 8331 /// mappers for the extracted mappable expressions (all included in \a 8332 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8333 /// pair of the relevant declaration and index where it occurs is appended to 8334 /// the device pointers info array. 8335 void generateAllInfoForClauses( 8336 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8337 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8338 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8339 // We have to process the component lists that relate with the same 8340 // declaration in a single chunk so that we can generate the map flags 8341 // correctly. Therefore, we organize all lists in a map. 8342 enum MapKind { Present, Allocs, Other, Total }; 8343 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8344 SmallVector<SmallVector<MapInfo, 8>, 4>> 8345 Info; 8346 8347 // Helper function to fill the information map for the different supported 8348 // clauses. 8349 auto &&InfoGen = 8350 [&Info, &SkipVarSet]( 8351 const ValueDecl *D, MapKind Kind, 8352 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8353 OpenMPMapClauseKind MapType, 8354 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8355 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8356 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8357 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8358 if (SkipVarSet.contains(D)) 8359 return; 8360 auto It = Info.find(D); 8361 if (It == Info.end()) 8362 It = Info 8363 .insert(std::make_pair( 8364 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8365 .first; 8366 It->second[Kind].emplace_back( 8367 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8368 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8369 }; 8370 8371 for (const auto *Cl : Clauses) { 8372 const auto *C = dyn_cast<OMPMapClause>(Cl); 8373 if (!C) 8374 continue; 8375 MapKind Kind = Other; 8376 if (!C->getMapTypeModifiers().empty() && 8377 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8378 return K == OMPC_MAP_MODIFIER_present; 8379 })) 8380 Kind = Present; 8381 else if (C->getMapType() == OMPC_MAP_alloc) 8382 Kind = Allocs; 8383 const auto *EI = C->getVarRefs().begin(); 8384 for (const auto L : C->component_lists()) { 8385 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8386 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8387 C->getMapTypeModifiers(), llvm::None, 8388 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8389 E); 8390 ++EI; 8391 } 8392 } 8393 for (const auto *Cl : Clauses) { 8394 const auto *C = dyn_cast<OMPToClause>(Cl); 8395 if (!C) 8396 continue; 8397 MapKind Kind = Other; 8398 if (!C->getMotionModifiers().empty() && 8399 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8400 return K == OMPC_MOTION_MODIFIER_present; 8401 })) 8402 Kind = Present; 8403 const auto *EI = C->getVarRefs().begin(); 8404 for (const auto L : C->component_lists()) { 8405 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8406 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8407 C->isImplicit(), std::get<2>(L), *EI); 8408 ++EI; 8409 } 8410 } 8411 for (const auto *Cl : Clauses) { 8412 const auto *C = dyn_cast<OMPFromClause>(Cl); 8413 if (!C) 8414 continue; 8415 MapKind Kind = Other; 8416 if (!C->getMotionModifiers().empty() && 8417 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8418 return K == OMPC_MOTION_MODIFIER_present; 8419 })) 8420 Kind = Present; 8421 const auto *EI = C->getVarRefs().begin(); 8422 for (const auto L : C->component_lists()) { 8423 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8424 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8425 C->isImplicit(), std::get<2>(L), *EI); 8426 ++EI; 8427 } 8428 } 8429 8430 // Look at the use_device_ptr clause information and mark the existing map 8431 // entries as such. If there is no map information for an entry in the 8432 // use_device_ptr list, we create one with map type 'alloc' and zero size 8433 // section. It is the user fault if that was not mapped before. If there is 8434 // no map information and the pointer is a struct member, then we defer the 8435 // emission of that entry until the whole struct has been processed. 8436 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8437 SmallVector<DeferredDevicePtrEntryTy, 4>> 8438 DeferredInfo; 8439 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8440 8441 for (const auto *Cl : Clauses) { 8442 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8443 if (!C) 8444 continue; 8445 for (const auto L : C->component_lists()) { 8446 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8447 std::get<1>(L); 8448 assert(!Components.empty() && 8449 "Not expecting empty list of components!"); 8450 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8451 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8452 const Expr *IE = Components.back().getAssociatedExpression(); 8453 // If the first component is a member expression, we have to look into 8454 // 'this', which maps to null in the map of map information. Otherwise 8455 // look directly for the information. 8456 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8457 8458 // We potentially have map information for this declaration already. 8459 // Look for the first set of components that refer to it. 8460 if (It != Info.end()) { 8461 bool Found = false; 8462 for (auto &Data : It->second) { 8463 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8464 return MI.Components.back().getAssociatedDeclaration() == VD; 8465 }); 8466 // If we found a map entry, signal that the pointer has to be 8467 // returned and move on to the next declaration. Exclude cases where 8468 // the base pointer is mapped as array subscript, array section or 8469 // array shaping. The base address is passed as a pointer to base in 8470 // this case and cannot be used as a base for use_device_ptr list 8471 // item. 8472 if (CI != Data.end()) { 8473 auto PrevCI = std::next(CI->Components.rbegin()); 8474 const auto *VarD = dyn_cast<VarDecl>(VD); 8475 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8476 isa<MemberExpr>(IE) || 8477 !VD->getType().getNonReferenceType()->isPointerType() || 8478 PrevCI == CI->Components.rend() || 8479 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8480 VarD->hasLocalStorage()) { 8481 CI->ReturnDevicePointer = true; 8482 Found = true; 8483 break; 8484 } 8485 } 8486 } 8487 if (Found) 8488 continue; 8489 } 8490 8491 // We didn't find any match in our map information - generate a zero 8492 // size array section - if the pointer is a struct member we defer this 8493 // action until the whole struct has been processed. 8494 if (isa<MemberExpr>(IE)) { 8495 // Insert the pointer into Info to be processed by 8496 // generateInfoForComponentList. Because it is a member pointer 8497 // without a pointee, no entry will be generated for it, therefore 8498 // we need to generate one after the whole struct has been processed. 8499 // Nonetheless, generateInfoForComponentList must be called to take 8500 // the pointer into account for the calculation of the range of the 8501 // partial struct. 8502 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8503 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8504 nullptr); 8505 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8506 } else { 8507 llvm::Value *Ptr = 8508 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8509 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8510 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8511 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8512 UseDevicePtrCombinedInfo.Sizes.push_back( 8513 llvm::Constant::getNullValue(CGF.Int64Ty)); 8514 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8515 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8516 } 8517 } 8518 } 8519 8520 // Look at the use_device_addr clause information and mark the existing map 8521 // entries as such. If there is no map information for an entry in the 8522 // use_device_addr list, we create one with map type 'alloc' and zero size 8523 // section. It is the user fault if that was not mapped before. If there is 8524 // no map information and the pointer is a struct member, then we defer the 8525 // emission of that entry until the whole struct has been processed. 8526 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8527 for (const auto *Cl : Clauses) { 8528 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8529 if (!C) 8530 continue; 8531 for (const auto L : C->component_lists()) { 8532 assert(!std::get<1>(L).empty() && 8533 "Not expecting empty list of components!"); 8534 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8535 if (!Processed.insert(VD).second) 8536 continue; 8537 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8538 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8539 // If the first component is a member expression, we have to look into 8540 // 'this', which maps to null in the map of map information. Otherwise 8541 // look directly for the information. 8542 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8543 8544 // We potentially have map information for this declaration already. 8545 // Look for the first set of components that refer to it. 8546 if (It != Info.end()) { 8547 bool Found = false; 8548 for (auto &Data : It->second) { 8549 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8550 return MI.Components.back().getAssociatedDeclaration() == VD; 8551 }); 8552 // If we found a map entry, signal that the pointer has to be 8553 // returned and move on to the next declaration. 8554 if (CI != Data.end()) { 8555 CI->ReturnDevicePointer = true; 8556 Found = true; 8557 break; 8558 } 8559 } 8560 if (Found) 8561 continue; 8562 } 8563 8564 // We didn't find any match in our map information - generate a zero 8565 // size array section - if the pointer is a struct member we defer this 8566 // action until the whole struct has been processed. 8567 if (isa<MemberExpr>(IE)) { 8568 // Insert the pointer into Info to be processed by 8569 // generateInfoForComponentList. Because it is a member pointer 8570 // without a pointee, no entry will be generated for it, therefore 8571 // we need to generate one after the whole struct has been processed. 8572 // Nonetheless, generateInfoForComponentList must be called to take 8573 // the pointer into account for the calculation of the range of the 8574 // partial struct. 8575 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8576 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8577 nullptr, nullptr, /*ForDeviceAddr=*/true); 8578 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8579 } else { 8580 llvm::Value *Ptr; 8581 if (IE->isGLValue()) 8582 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8583 else 8584 Ptr = CGF.EmitScalarExpr(IE); 8585 CombinedInfo.Exprs.push_back(VD); 8586 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8587 CombinedInfo.Pointers.push_back(Ptr); 8588 CombinedInfo.Sizes.push_back( 8589 llvm::Constant::getNullValue(CGF.Int64Ty)); 8590 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8591 CombinedInfo.Mappers.push_back(nullptr); 8592 } 8593 } 8594 } 8595 8596 for (const auto &Data : Info) { 8597 StructRangeInfoTy PartialStruct; 8598 // Temporary generated information. 8599 MapCombinedInfoTy CurInfo; 8600 const Decl *D = Data.first; 8601 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8602 for (const auto &M : Data.second) { 8603 for (const MapInfo &L : M) { 8604 assert(!L.Components.empty() && 8605 "Not expecting declaration with no component lists."); 8606 8607 // Remember the current base pointer index. 8608 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8609 CurInfo.NonContigInfo.IsNonContiguous = 8610 L.Components.back().isNonContiguous(); 8611 generateInfoForComponentList( 8612 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8613 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8614 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8615 8616 // If this entry relates with a device pointer, set the relevant 8617 // declaration and add the 'return pointer' flag. 8618 if (L.ReturnDevicePointer) { 8619 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8620 "Unexpected number of mapped base pointers."); 8621 8622 const ValueDecl *RelevantVD = 8623 L.Components.back().getAssociatedDeclaration(); 8624 assert(RelevantVD && 8625 "No relevant declaration related with device pointer??"); 8626 8627 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8628 RelevantVD); 8629 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8630 } 8631 } 8632 } 8633 8634 // Append any pending zero-length pointers which are struct members and 8635 // used with use_device_ptr or use_device_addr. 8636 auto CI = DeferredInfo.find(Data.first); 8637 if (CI != DeferredInfo.end()) { 8638 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8639 llvm::Value *BasePtr; 8640 llvm::Value *Ptr; 8641 if (L.ForDeviceAddr) { 8642 if (L.IE->isGLValue()) 8643 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8644 else 8645 Ptr = this->CGF.EmitScalarExpr(L.IE); 8646 BasePtr = Ptr; 8647 // Entry is RETURN_PARAM. Also, set the placeholder value 8648 // MEMBER_OF=FFFF so that the entry is later updated with the 8649 // correct value of MEMBER_OF. 8650 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8651 } else { 8652 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8653 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8654 L.IE->getExprLoc()); 8655 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8656 // placeholder value MEMBER_OF=FFFF so that the entry is later 8657 // updated with the correct value of MEMBER_OF. 8658 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8659 OMP_MAP_MEMBER_OF); 8660 } 8661 CurInfo.Exprs.push_back(L.VD); 8662 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8663 CurInfo.Pointers.push_back(Ptr); 8664 CurInfo.Sizes.push_back( 8665 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8666 CurInfo.Mappers.push_back(nullptr); 8667 } 8668 } 8669 // If there is an entry in PartialStruct it means we have a struct with 8670 // individual members mapped. Emit an extra combined entry. 8671 if (PartialStruct.Base.isValid()) { 8672 CurInfo.NonContigInfo.Dims.push_back(0); 8673 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8674 } 8675 8676 // We need to append the results of this capture to what we already 8677 // have. 8678 CombinedInfo.append(CurInfo); 8679 } 8680 // Append data for use_device_ptr clauses. 8681 CombinedInfo.append(UseDevicePtrCombinedInfo); 8682 } 8683 8684 public: 8685 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8686 : CurDir(&Dir), CGF(CGF) { 8687 // Extract firstprivate clause information. 8688 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8689 for (const auto *D : C->varlists()) 8690 FirstPrivateDecls.try_emplace( 8691 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8692 // Extract implicit firstprivates from uses_allocators clauses. 8693 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8694 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8695 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8696 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8697 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8698 /*Implicit=*/true); 8699 else if (const auto *VD = dyn_cast<VarDecl>( 8700 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8701 ->getDecl())) 8702 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8703 } 8704 } 8705 // Extract device pointer clause information. 8706 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8707 for (auto L : C->component_lists()) 8708 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8709 } 8710 8711 /// Constructor for the declare mapper directive. 8712 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8713 : CurDir(&Dir), CGF(CGF) {} 8714 8715 /// Generate code for the combined entry if we have a partially mapped struct 8716 /// and take care of the mapping flags of the arguments corresponding to 8717 /// individual struct members. 8718 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8719 MapFlagsArrayTy &CurTypes, 8720 const StructRangeInfoTy &PartialStruct, 8721 const ValueDecl *VD = nullptr, 8722 bool NotTargetParams = true) const { 8723 if (CurTypes.size() == 1 && 8724 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8725 !PartialStruct.IsArraySection) 8726 return; 8727 Address LBAddr = PartialStruct.LowestElem.second; 8728 Address HBAddr = PartialStruct.HighestElem.second; 8729 if (PartialStruct.HasCompleteRecord) { 8730 LBAddr = PartialStruct.LB; 8731 HBAddr = PartialStruct.LB; 8732 } 8733 CombinedInfo.Exprs.push_back(VD); 8734 // Base is the base of the struct 8735 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8736 // Pointer is the address of the lowest element 8737 llvm::Value *LB = LBAddr.getPointer(); 8738 CombinedInfo.Pointers.push_back(LB); 8739 // There should not be a mapper for a combined entry. 8740 CombinedInfo.Mappers.push_back(nullptr); 8741 // Size is (addr of {highest+1} element) - (addr of lowest element) 8742 llvm::Value *HB = HBAddr.getPointer(); 8743 llvm::Value *HAddr = 8744 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8745 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8746 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8747 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8748 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8749 /*isSigned=*/false); 8750 CombinedInfo.Sizes.push_back(Size); 8751 // Map type is always TARGET_PARAM, if generate info for captures. 8752 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8753 : OMP_MAP_TARGET_PARAM); 8754 // If any element has the present modifier, then make sure the runtime 8755 // doesn't attempt to allocate the struct. 8756 if (CurTypes.end() != 8757 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8758 return Type & OMP_MAP_PRESENT; 8759 })) 8760 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8761 // Remove TARGET_PARAM flag from the first element 8762 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8763 8764 // All other current entries will be MEMBER_OF the combined entry 8765 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8766 // 0xFFFF in the MEMBER_OF field). 8767 OpenMPOffloadMappingFlags MemberOfFlag = 8768 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8769 for (auto &M : CurTypes) 8770 setCorrectMemberOfFlag(M, MemberOfFlag); 8771 } 8772 8773 /// Generate all the base pointers, section pointers, sizes, map types, and 8774 /// mappers for the extracted mappable expressions (all included in \a 8775 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8776 /// pair of the relevant declaration and index where it occurs is appended to 8777 /// the device pointers info array. 8778 void generateAllInfo( 8779 MapCombinedInfoTy &CombinedInfo, 8780 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8781 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8782 assert(CurDir.is<const OMPExecutableDirective *>() && 8783 "Expect a executable directive"); 8784 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8785 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8786 } 8787 8788 /// Generate all the base pointers, section pointers, sizes, map types, and 8789 /// mappers for the extracted map clauses of user-defined mapper (all included 8790 /// in \a CombinedInfo). 8791 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8792 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8793 "Expect a declare mapper directive"); 8794 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8795 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8796 } 8797 8798 /// Emit capture info for lambdas for variables captured by reference. 8799 void generateInfoForLambdaCaptures( 8800 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8801 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8802 const auto *RD = VD->getType() 8803 .getCanonicalType() 8804 .getNonReferenceType() 8805 ->getAsCXXRecordDecl(); 8806 if (!RD || !RD->isLambda()) 8807 return; 8808 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8809 LValue VDLVal = CGF.MakeAddrLValue( 8810 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8811 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8812 FieldDecl *ThisCapture = nullptr; 8813 RD->getCaptureFields(Captures, ThisCapture); 8814 if (ThisCapture) { 8815 LValue ThisLVal = 8816 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8817 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8818 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8819 VDLVal.getPointer(CGF)); 8820 CombinedInfo.Exprs.push_back(VD); 8821 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8822 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8823 CombinedInfo.Sizes.push_back( 8824 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8825 CGF.Int64Ty, /*isSigned=*/true)); 8826 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8827 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8828 CombinedInfo.Mappers.push_back(nullptr); 8829 } 8830 for (const LambdaCapture &LC : RD->captures()) { 8831 if (!LC.capturesVariable()) 8832 continue; 8833 const VarDecl *VD = LC.getCapturedVar(); 8834 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8835 continue; 8836 auto It = Captures.find(VD); 8837 assert(It != Captures.end() && "Found lambda capture without field."); 8838 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8839 if (LC.getCaptureKind() == LCK_ByRef) { 8840 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8841 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8842 VDLVal.getPointer(CGF)); 8843 CombinedInfo.Exprs.push_back(VD); 8844 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8845 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8846 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8847 CGF.getTypeSize( 8848 VD->getType().getCanonicalType().getNonReferenceType()), 8849 CGF.Int64Ty, /*isSigned=*/true)); 8850 } else { 8851 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8852 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8853 VDLVal.getPointer(CGF)); 8854 CombinedInfo.Exprs.push_back(VD); 8855 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8856 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8857 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8858 } 8859 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8860 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8861 CombinedInfo.Mappers.push_back(nullptr); 8862 } 8863 } 8864 8865 /// Set correct indices for lambdas captures. 8866 void adjustMemberOfForLambdaCaptures( 8867 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8868 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8869 MapFlagsArrayTy &Types) const { 8870 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8871 // Set correct member_of idx for all implicit lambda captures. 8872 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8873 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8874 continue; 8875 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8876 assert(BasePtr && "Unable to find base lambda address."); 8877 int TgtIdx = -1; 8878 for (unsigned J = I; J > 0; --J) { 8879 unsigned Idx = J - 1; 8880 if (Pointers[Idx] != BasePtr) 8881 continue; 8882 TgtIdx = Idx; 8883 break; 8884 } 8885 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8886 // All other current entries will be MEMBER_OF the combined entry 8887 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8888 // 0xFFFF in the MEMBER_OF field). 8889 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8890 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8891 } 8892 } 8893 8894 /// Generate the base pointers, section pointers, sizes, map types, and 8895 /// mappers associated to a given capture (all included in \a CombinedInfo). 8896 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8897 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8898 StructRangeInfoTy &PartialStruct) const { 8899 assert(!Cap->capturesVariableArrayType() && 8900 "Not expecting to generate map info for a variable array type!"); 8901 8902 // We need to know when we generating information for the first component 8903 const ValueDecl *VD = Cap->capturesThis() 8904 ? nullptr 8905 : Cap->getCapturedVar()->getCanonicalDecl(); 8906 8907 // If this declaration appears in a is_device_ptr clause we just have to 8908 // pass the pointer by value. If it is a reference to a declaration, we just 8909 // pass its value. 8910 if (DevPointersMap.count(VD)) { 8911 CombinedInfo.Exprs.push_back(VD); 8912 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8913 CombinedInfo.Pointers.push_back(Arg); 8914 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8915 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8916 /*isSigned=*/true)); 8917 CombinedInfo.Types.push_back( 8918 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8919 OMP_MAP_TARGET_PARAM); 8920 CombinedInfo.Mappers.push_back(nullptr); 8921 return; 8922 } 8923 8924 using MapData = 8925 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8926 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8927 const ValueDecl *, const Expr *>; 8928 SmallVector<MapData, 4> DeclComponentLists; 8929 assert(CurDir.is<const OMPExecutableDirective *>() && 8930 "Expect a executable directive"); 8931 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8932 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8933 const auto *EI = C->getVarRefs().begin(); 8934 for (const auto L : C->decl_component_lists(VD)) { 8935 const ValueDecl *VDecl, *Mapper; 8936 // The Expression is not correct if the mapping is implicit 8937 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8938 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8939 std::tie(VDecl, Components, Mapper) = L; 8940 assert(VDecl == VD && "We got information for the wrong declaration??"); 8941 assert(!Components.empty() && 8942 "Not expecting declaration with no component lists."); 8943 DeclComponentLists.emplace_back(Components, C->getMapType(), 8944 C->getMapTypeModifiers(), 8945 C->isImplicit(), Mapper, E); 8946 ++EI; 8947 } 8948 } 8949 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8950 const MapData &RHS) { 8951 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8952 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8953 bool HasPresent = !MapModifiers.empty() && 8954 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8955 return K == clang::OMPC_MAP_MODIFIER_present; 8956 }); 8957 bool HasAllocs = MapType == OMPC_MAP_alloc; 8958 MapModifiers = std::get<2>(RHS); 8959 MapType = std::get<1>(LHS); 8960 bool HasPresentR = 8961 !MapModifiers.empty() && 8962 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8963 return K == clang::OMPC_MAP_MODIFIER_present; 8964 }); 8965 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8966 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8967 }); 8968 8969 // Find overlapping elements (including the offset from the base element). 8970 llvm::SmallDenseMap< 8971 const MapData *, 8972 llvm::SmallVector< 8973 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8974 4> 8975 OverlappedData; 8976 size_t Count = 0; 8977 for (const MapData &L : DeclComponentLists) { 8978 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8979 OpenMPMapClauseKind MapType; 8980 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8981 bool IsImplicit; 8982 const ValueDecl *Mapper; 8983 const Expr *VarRef; 8984 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8985 L; 8986 ++Count; 8987 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8988 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8989 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8990 VarRef) = L1; 8991 auto CI = Components.rbegin(); 8992 auto CE = Components.rend(); 8993 auto SI = Components1.rbegin(); 8994 auto SE = Components1.rend(); 8995 for (; CI != CE && SI != SE; ++CI, ++SI) { 8996 if (CI->getAssociatedExpression()->getStmtClass() != 8997 SI->getAssociatedExpression()->getStmtClass()) 8998 break; 8999 // Are we dealing with different variables/fields? 9000 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9001 break; 9002 } 9003 // Found overlapping if, at least for one component, reached the head 9004 // of the components list. 9005 if (CI == CE || SI == SE) { 9006 // Ignore it if it is the same component. 9007 if (CI == CE && SI == SE) 9008 continue; 9009 const auto It = (SI == SE) ? CI : SI; 9010 // If one component is a pointer and another one is a kind of 9011 // dereference of this pointer (array subscript, section, dereference, 9012 // etc.), it is not an overlapping. 9013 // Same, if one component is a base and another component is a 9014 // dereferenced pointer memberexpr with the same base. 9015 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9016 (std::prev(It)->getAssociatedDeclaration() && 9017 std::prev(It) 9018 ->getAssociatedDeclaration() 9019 ->getType() 9020 ->isPointerType()) || 9021 (It->getAssociatedDeclaration() && 9022 It->getAssociatedDeclaration()->getType()->isPointerType() && 9023 std::next(It) != CE && std::next(It) != SE)) 9024 continue; 9025 const MapData &BaseData = CI == CE ? L : L1; 9026 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9027 SI == SE ? Components : Components1; 9028 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9029 OverlappedElements.getSecond().push_back(SubData); 9030 } 9031 } 9032 } 9033 // Sort the overlapped elements for each item. 9034 llvm::SmallVector<const FieldDecl *, 4> Layout; 9035 if (!OverlappedData.empty()) { 9036 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9037 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9038 while (BaseType != OrigType) { 9039 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9040 OrigType = BaseType->getPointeeOrArrayElementType(); 9041 } 9042 9043 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9044 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9045 else { 9046 const auto *RD = BaseType->getAsRecordDecl(); 9047 Layout.append(RD->field_begin(), RD->field_end()); 9048 } 9049 } 9050 for (auto &Pair : OverlappedData) { 9051 llvm::stable_sort( 9052 Pair.getSecond(), 9053 [&Layout]( 9054 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9055 OMPClauseMappableExprCommon::MappableExprComponentListRef 9056 Second) { 9057 auto CI = First.rbegin(); 9058 auto CE = First.rend(); 9059 auto SI = Second.rbegin(); 9060 auto SE = Second.rend(); 9061 for (; CI != CE && SI != SE; ++CI, ++SI) { 9062 if (CI->getAssociatedExpression()->getStmtClass() != 9063 SI->getAssociatedExpression()->getStmtClass()) 9064 break; 9065 // Are we dealing with different variables/fields? 9066 if (CI->getAssociatedDeclaration() != 9067 SI->getAssociatedDeclaration()) 9068 break; 9069 } 9070 9071 // Lists contain the same elements. 9072 if (CI == CE && SI == SE) 9073 return false; 9074 9075 // List with less elements is less than list with more elements. 9076 if (CI == CE || SI == SE) 9077 return CI == CE; 9078 9079 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9080 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9081 if (FD1->getParent() == FD2->getParent()) 9082 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9083 const auto *It = 9084 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9085 return FD == FD1 || FD == FD2; 9086 }); 9087 return *It == FD1; 9088 }); 9089 } 9090 9091 // Associated with a capture, because the mapping flags depend on it. 9092 // Go through all of the elements with the overlapped elements. 9093 bool IsFirstComponentList = true; 9094 for (const auto &Pair : OverlappedData) { 9095 const MapData &L = *Pair.getFirst(); 9096 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9097 OpenMPMapClauseKind MapType; 9098 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9099 bool IsImplicit; 9100 const ValueDecl *Mapper; 9101 const Expr *VarRef; 9102 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9103 L; 9104 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9105 OverlappedComponents = Pair.getSecond(); 9106 generateInfoForComponentList( 9107 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9108 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9109 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9110 IsFirstComponentList = false; 9111 } 9112 // Go through other elements without overlapped elements. 9113 for (const MapData &L : DeclComponentLists) { 9114 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9115 OpenMPMapClauseKind MapType; 9116 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9117 bool IsImplicit; 9118 const ValueDecl *Mapper; 9119 const Expr *VarRef; 9120 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9121 L; 9122 auto It = OverlappedData.find(&L); 9123 if (It == OverlappedData.end()) 9124 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9125 Components, CombinedInfo, PartialStruct, 9126 IsFirstComponentList, IsImplicit, Mapper, 9127 /*ForDeviceAddr=*/false, VD, VarRef); 9128 IsFirstComponentList = false; 9129 } 9130 } 9131 9132 /// Generate the default map information for a given capture \a CI, 9133 /// record field declaration \a RI and captured value \a CV. 9134 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9135 const FieldDecl &RI, llvm::Value *CV, 9136 MapCombinedInfoTy &CombinedInfo) const { 9137 bool IsImplicit = true; 9138 // Do the default mapping. 9139 if (CI.capturesThis()) { 9140 CombinedInfo.Exprs.push_back(nullptr); 9141 CombinedInfo.BasePointers.push_back(CV); 9142 CombinedInfo.Pointers.push_back(CV); 9143 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9144 CombinedInfo.Sizes.push_back( 9145 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9146 CGF.Int64Ty, /*isSigned=*/true)); 9147 // Default map type. 9148 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9149 } else if (CI.capturesVariableByCopy()) { 9150 const VarDecl *VD = CI.getCapturedVar(); 9151 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9152 CombinedInfo.BasePointers.push_back(CV); 9153 CombinedInfo.Pointers.push_back(CV); 9154 if (!RI.getType()->isAnyPointerType()) { 9155 // We have to signal to the runtime captures passed by value that are 9156 // not pointers. 9157 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9158 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9159 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9160 } else { 9161 // Pointers are implicitly mapped with a zero size and no flags 9162 // (other than first map that is added for all implicit maps). 9163 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9164 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9165 } 9166 auto I = FirstPrivateDecls.find(VD); 9167 if (I != FirstPrivateDecls.end()) 9168 IsImplicit = I->getSecond(); 9169 } else { 9170 assert(CI.capturesVariable() && "Expected captured reference."); 9171 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9172 QualType ElementType = PtrTy->getPointeeType(); 9173 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9174 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9175 // The default map type for a scalar/complex type is 'to' because by 9176 // default the value doesn't have to be retrieved. For an aggregate 9177 // type, the default is 'tofrom'. 9178 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9179 const VarDecl *VD = CI.getCapturedVar(); 9180 auto I = FirstPrivateDecls.find(VD); 9181 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9182 CombinedInfo.BasePointers.push_back(CV); 9183 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9184 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9185 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9186 AlignmentSource::Decl)); 9187 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9188 } else { 9189 CombinedInfo.Pointers.push_back(CV); 9190 } 9191 if (I != FirstPrivateDecls.end()) 9192 IsImplicit = I->getSecond(); 9193 } 9194 // Every default map produces a single argument which is a target parameter. 9195 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9196 9197 // Add flag stating this is an implicit map. 9198 if (IsImplicit) 9199 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9200 9201 // No user-defined mapper for default mapping. 9202 CombinedInfo.Mappers.push_back(nullptr); 9203 } 9204 }; 9205 } // anonymous namespace 9206 9207 static void emitNonContiguousDescriptor( 9208 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9209 CGOpenMPRuntime::TargetDataInfo &Info) { 9210 CodeGenModule &CGM = CGF.CGM; 9211 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9212 &NonContigInfo = CombinedInfo.NonContigInfo; 9213 9214 // Build an array of struct descriptor_dim and then assign it to 9215 // offload_args. 9216 // 9217 // struct descriptor_dim { 9218 // uint64_t offset; 9219 // uint64_t count; 9220 // uint64_t stride 9221 // }; 9222 ASTContext &C = CGF.getContext(); 9223 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9224 RecordDecl *RD; 9225 RD = C.buildImplicitRecord("descriptor_dim"); 9226 RD->startDefinition(); 9227 addFieldToRecordDecl(C, RD, Int64Ty); 9228 addFieldToRecordDecl(C, RD, Int64Ty); 9229 addFieldToRecordDecl(C, RD, Int64Ty); 9230 RD->completeDefinition(); 9231 QualType DimTy = C.getRecordType(RD); 9232 9233 enum { OffsetFD = 0, CountFD, StrideFD }; 9234 // We need two index variable here since the size of "Dims" is the same as the 9235 // size of Components, however, the size of offset, count, and stride is equal 9236 // to the size of base declaration that is non-contiguous. 9237 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9238 // Skip emitting ir if dimension size is 1 since it cannot be 9239 // non-contiguous. 9240 if (NonContigInfo.Dims[I] == 1) 9241 continue; 9242 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9243 QualType ArrayTy = 9244 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9245 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9246 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9247 unsigned RevIdx = EE - II - 1; 9248 LValue DimsLVal = CGF.MakeAddrLValue( 9249 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9250 // Offset 9251 LValue OffsetLVal = CGF.EmitLValueForField( 9252 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9253 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9254 // Count 9255 LValue CountLVal = CGF.EmitLValueForField( 9256 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9257 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9258 // Stride 9259 LValue StrideLVal = CGF.EmitLValueForField( 9260 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9261 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9262 } 9263 // args[I] = &dims 9264 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9265 DimsAddr, CGM.Int8PtrTy); 9266 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9267 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9268 Info.PointersArray, 0, I); 9269 Address PAddr(P, CGF.getPointerAlign()); 9270 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9271 ++L; 9272 } 9273 } 9274 9275 /// Emit a string constant containing the names of the values mapped to the 9276 /// offloading runtime library. 9277 llvm::Constant * 9278 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9279 MappableExprsHandler::MappingExprInfo &MapExprs) { 9280 llvm::Constant *SrcLocStr; 9281 if (!MapExprs.getMapDecl()) { 9282 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9283 } else { 9284 std::string ExprName = ""; 9285 if (MapExprs.getMapExpr()) { 9286 PrintingPolicy P(CGF.getContext().getLangOpts()); 9287 llvm::raw_string_ostream OS(ExprName); 9288 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9289 OS.flush(); 9290 } else { 9291 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9292 } 9293 9294 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9295 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9296 const char *FileName = PLoc.getFilename(); 9297 unsigned Line = PLoc.getLine(); 9298 unsigned Column = PLoc.getColumn(); 9299 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9300 Line, Column); 9301 } 9302 return SrcLocStr; 9303 } 9304 9305 /// Emit the arrays used to pass the captures and map information to the 9306 /// offloading runtime library. If there is no map or capture information, 9307 /// return nullptr by reference. 9308 static void emitOffloadingArrays( 9309 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9310 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9311 bool IsNonContiguous = false) { 9312 CodeGenModule &CGM = CGF.CGM; 9313 ASTContext &Ctx = CGF.getContext(); 9314 9315 // Reset the array information. 9316 Info.clearArrayInfo(); 9317 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9318 9319 if (Info.NumberOfPtrs) { 9320 // Detect if we have any capture size requiring runtime evaluation of the 9321 // size so that a constant array could be eventually used. 9322 bool hasRuntimeEvaluationCaptureSize = false; 9323 for (llvm::Value *S : CombinedInfo.Sizes) 9324 if (!isa<llvm::Constant>(S)) { 9325 hasRuntimeEvaluationCaptureSize = true; 9326 break; 9327 } 9328 9329 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9330 QualType PointerArrayType = Ctx.getConstantArrayType( 9331 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9332 /*IndexTypeQuals=*/0); 9333 9334 Info.BasePointersArray = 9335 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9336 Info.PointersArray = 9337 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9338 Address MappersArray = 9339 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9340 Info.MappersArray = MappersArray.getPointer(); 9341 9342 // If we don't have any VLA types or other types that require runtime 9343 // evaluation, we can use a constant array for the map sizes, otherwise we 9344 // need to fill up the arrays as we do for the pointers. 9345 QualType Int64Ty = 9346 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9347 if (hasRuntimeEvaluationCaptureSize) { 9348 QualType SizeArrayType = Ctx.getConstantArrayType( 9349 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9350 /*IndexTypeQuals=*/0); 9351 Info.SizesArray = 9352 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9353 } else { 9354 // We expect all the sizes to be constant, so we collect them to create 9355 // a constant array. 9356 SmallVector<llvm::Constant *, 16> ConstSizes; 9357 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9358 if (IsNonContiguous && 9359 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9360 ConstSizes.push_back(llvm::ConstantInt::get( 9361 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9362 } else { 9363 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9364 } 9365 } 9366 9367 auto *SizesArrayInit = llvm::ConstantArray::get( 9368 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9369 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9370 auto *SizesArrayGbl = new llvm::GlobalVariable( 9371 CGM.getModule(), SizesArrayInit->getType(), 9372 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9373 SizesArrayInit, Name); 9374 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9375 Info.SizesArray = SizesArrayGbl; 9376 } 9377 9378 // The map types are always constant so we don't need to generate code to 9379 // fill arrays. Instead, we create an array constant. 9380 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9381 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9382 std::string MaptypesName = 9383 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9384 auto *MapTypesArrayGbl = 9385 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9386 Info.MapTypesArray = MapTypesArrayGbl; 9387 9388 // The information types are only built if there is debug information 9389 // requested. 9390 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9391 Info.MapNamesArray = llvm::Constant::getNullValue( 9392 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9393 } else { 9394 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9395 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9396 }; 9397 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9398 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9399 std::string MapnamesName = 9400 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9401 auto *MapNamesArrayGbl = 9402 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9403 Info.MapNamesArray = MapNamesArrayGbl; 9404 } 9405 9406 // If there's a present map type modifier, it must not be applied to the end 9407 // of a region, so generate a separate map type array in that case. 9408 if (Info.separateBeginEndCalls()) { 9409 bool EndMapTypesDiffer = false; 9410 for (uint64_t &Type : Mapping) { 9411 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9412 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9413 EndMapTypesDiffer = true; 9414 } 9415 } 9416 if (EndMapTypesDiffer) { 9417 MapTypesArrayGbl = 9418 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9419 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9420 } 9421 } 9422 9423 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9424 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9425 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9426 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9427 Info.BasePointersArray, 0, I); 9428 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9429 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9430 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9431 CGF.Builder.CreateStore(BPVal, BPAddr); 9432 9433 if (Info.requiresDevicePointerInfo()) 9434 if (const ValueDecl *DevVD = 9435 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9436 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9437 9438 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9439 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9440 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9441 Info.PointersArray, 0, I); 9442 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9443 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9444 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9445 CGF.Builder.CreateStore(PVal, PAddr); 9446 9447 if (hasRuntimeEvaluationCaptureSize) { 9448 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9449 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9450 Info.SizesArray, 9451 /*Idx0=*/0, 9452 /*Idx1=*/I); 9453 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9454 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9455 CGM.Int64Ty, 9456 /*isSigned=*/true), 9457 SAddr); 9458 } 9459 9460 // Fill up the mapper array. 9461 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9462 if (CombinedInfo.Mappers[I]) { 9463 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9464 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9465 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9466 Info.HasMapper = true; 9467 } 9468 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9469 CGF.Builder.CreateStore(MFunc, MAddr); 9470 } 9471 } 9472 9473 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9474 Info.NumberOfPtrs == 0) 9475 return; 9476 9477 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9478 } 9479 9480 namespace { 9481 /// Additional arguments for emitOffloadingArraysArgument function. 9482 struct ArgumentsOptions { 9483 bool ForEndCall = false; 9484 ArgumentsOptions() = default; 9485 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9486 }; 9487 } // namespace 9488 9489 /// Emit the arguments to be passed to the runtime library based on the 9490 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9491 /// ForEndCall, emit map types to be passed for the end of the region instead of 9492 /// the beginning. 9493 static void emitOffloadingArraysArgument( 9494 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9495 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9496 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9497 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9498 const ArgumentsOptions &Options = ArgumentsOptions()) { 9499 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9500 "expected region end call to runtime only when end call is separate"); 9501 CodeGenModule &CGM = CGF.CGM; 9502 if (Info.NumberOfPtrs) { 9503 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9504 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9505 Info.BasePointersArray, 9506 /*Idx0=*/0, /*Idx1=*/0); 9507 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9508 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9509 Info.PointersArray, 9510 /*Idx0=*/0, 9511 /*Idx1=*/0); 9512 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9513 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9514 /*Idx0=*/0, /*Idx1=*/0); 9515 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9516 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9517 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9518 : Info.MapTypesArray, 9519 /*Idx0=*/0, 9520 /*Idx1=*/0); 9521 9522 // Only emit the mapper information arrays if debug information is 9523 // requested. 9524 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9525 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9526 else 9527 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9528 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9529 Info.MapNamesArray, 9530 /*Idx0=*/0, 9531 /*Idx1=*/0); 9532 // If there is no user-defined mapper, set the mapper array to nullptr to 9533 // avoid an unnecessary data privatization 9534 if (!Info.HasMapper) 9535 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9536 else 9537 MappersArrayArg = 9538 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9539 } else { 9540 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9541 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9542 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9543 MapTypesArrayArg = 9544 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9545 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9546 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9547 } 9548 } 9549 9550 /// Check for inner distribute directive. 9551 static const OMPExecutableDirective * 9552 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9553 const auto *CS = D.getInnermostCapturedStmt(); 9554 const auto *Body = 9555 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9556 const Stmt *ChildStmt = 9557 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9558 9559 if (const auto *NestedDir = 9560 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9561 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9562 switch (D.getDirectiveKind()) { 9563 case OMPD_target: 9564 if (isOpenMPDistributeDirective(DKind)) 9565 return NestedDir; 9566 if (DKind == OMPD_teams) { 9567 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9568 /*IgnoreCaptured=*/true); 9569 if (!Body) 9570 return nullptr; 9571 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9572 if (const auto *NND = 9573 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9574 DKind = NND->getDirectiveKind(); 9575 if (isOpenMPDistributeDirective(DKind)) 9576 return NND; 9577 } 9578 } 9579 return nullptr; 9580 case OMPD_target_teams: 9581 if (isOpenMPDistributeDirective(DKind)) 9582 return NestedDir; 9583 return nullptr; 9584 case OMPD_target_parallel: 9585 case OMPD_target_simd: 9586 case OMPD_target_parallel_for: 9587 case OMPD_target_parallel_for_simd: 9588 return nullptr; 9589 case OMPD_target_teams_distribute: 9590 case OMPD_target_teams_distribute_simd: 9591 case OMPD_target_teams_distribute_parallel_for: 9592 case OMPD_target_teams_distribute_parallel_for_simd: 9593 case OMPD_parallel: 9594 case OMPD_for: 9595 case OMPD_parallel_for: 9596 case OMPD_parallel_master: 9597 case OMPD_parallel_sections: 9598 case OMPD_for_simd: 9599 case OMPD_parallel_for_simd: 9600 case OMPD_cancel: 9601 case OMPD_cancellation_point: 9602 case OMPD_ordered: 9603 case OMPD_threadprivate: 9604 case OMPD_allocate: 9605 case OMPD_task: 9606 case OMPD_simd: 9607 case OMPD_tile: 9608 case OMPD_unroll: 9609 case OMPD_sections: 9610 case OMPD_section: 9611 case OMPD_single: 9612 case OMPD_master: 9613 case OMPD_critical: 9614 case OMPD_taskyield: 9615 case OMPD_barrier: 9616 case OMPD_taskwait: 9617 case OMPD_taskgroup: 9618 case OMPD_atomic: 9619 case OMPD_flush: 9620 case OMPD_depobj: 9621 case OMPD_scan: 9622 case OMPD_teams: 9623 case OMPD_target_data: 9624 case OMPD_target_exit_data: 9625 case OMPD_target_enter_data: 9626 case OMPD_distribute: 9627 case OMPD_distribute_simd: 9628 case OMPD_distribute_parallel_for: 9629 case OMPD_distribute_parallel_for_simd: 9630 case OMPD_teams_distribute: 9631 case OMPD_teams_distribute_simd: 9632 case OMPD_teams_distribute_parallel_for: 9633 case OMPD_teams_distribute_parallel_for_simd: 9634 case OMPD_target_update: 9635 case OMPD_declare_simd: 9636 case OMPD_declare_variant: 9637 case OMPD_begin_declare_variant: 9638 case OMPD_end_declare_variant: 9639 case OMPD_declare_target: 9640 case OMPD_end_declare_target: 9641 case OMPD_declare_reduction: 9642 case OMPD_declare_mapper: 9643 case OMPD_taskloop: 9644 case OMPD_taskloop_simd: 9645 case OMPD_master_taskloop: 9646 case OMPD_master_taskloop_simd: 9647 case OMPD_parallel_master_taskloop: 9648 case OMPD_parallel_master_taskloop_simd: 9649 case OMPD_requires: 9650 case OMPD_unknown: 9651 default: 9652 llvm_unreachable("Unexpected directive."); 9653 } 9654 } 9655 9656 return nullptr; 9657 } 9658 9659 /// Emit the user-defined mapper function. The code generation follows the 9660 /// pattern in the example below. 9661 /// \code 9662 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9663 /// void *base, void *begin, 9664 /// int64_t size, int64_t type, 9665 /// void *name = nullptr) { 9666 /// // Allocate space for an array section first or add a base/begin for 9667 /// // pointer dereference. 9668 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9669 /// !maptype.IsDelete) 9670 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9671 /// size*sizeof(Ty), clearToFromMember(type)); 9672 /// // Map members. 9673 /// for (unsigned i = 0; i < size; i++) { 9674 /// // For each component specified by this mapper: 9675 /// for (auto c : begin[i]->all_components) { 9676 /// if (c.hasMapper()) 9677 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9678 /// c.arg_type, c.arg_name); 9679 /// else 9680 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9681 /// c.arg_begin, c.arg_size, c.arg_type, 9682 /// c.arg_name); 9683 /// } 9684 /// } 9685 /// // Delete the array section. 9686 /// if (size > 1 && maptype.IsDelete) 9687 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9688 /// size*sizeof(Ty), clearToFromMember(type)); 9689 /// } 9690 /// \endcode 9691 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9692 CodeGenFunction *CGF) { 9693 if (UDMMap.count(D) > 0) 9694 return; 9695 ASTContext &C = CGM.getContext(); 9696 QualType Ty = D->getType(); 9697 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9698 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9699 auto *MapperVarDecl = 9700 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9701 SourceLocation Loc = D->getLocation(); 9702 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9703 9704 // Prepare mapper function arguments and attributes. 9705 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9706 C.VoidPtrTy, ImplicitParamDecl::Other); 9707 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9708 ImplicitParamDecl::Other); 9709 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9710 C.VoidPtrTy, ImplicitParamDecl::Other); 9711 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9712 ImplicitParamDecl::Other); 9713 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9714 ImplicitParamDecl::Other); 9715 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9716 ImplicitParamDecl::Other); 9717 FunctionArgList Args; 9718 Args.push_back(&HandleArg); 9719 Args.push_back(&BaseArg); 9720 Args.push_back(&BeginArg); 9721 Args.push_back(&SizeArg); 9722 Args.push_back(&TypeArg); 9723 Args.push_back(&NameArg); 9724 const CGFunctionInfo &FnInfo = 9725 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9726 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9727 SmallString<64> TyStr; 9728 llvm::raw_svector_ostream Out(TyStr); 9729 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9730 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9731 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9732 Name, &CGM.getModule()); 9733 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9734 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9735 // Start the mapper function code generation. 9736 CodeGenFunction MapperCGF(CGM); 9737 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9738 // Compute the starting and end addresses of array elements. 9739 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9740 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9741 C.getPointerType(Int64Ty), Loc); 9742 // Prepare common arguments for array initiation and deletion. 9743 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9744 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9745 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9746 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9747 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9748 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9749 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9750 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9751 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9752 // Convert the size in bytes into the number of array elements. 9753 Size = MapperCGF.Builder.CreateExactUDiv( 9754 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9755 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9756 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9757 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 9758 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 9759 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9760 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9761 C.getPointerType(Int64Ty), Loc); 9762 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9763 MapperCGF.GetAddrOfLocalVar(&NameArg), 9764 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9765 9766 // Emit array initiation if this is an array section and \p MapType indicates 9767 // that memory allocation is required. 9768 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9769 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9770 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9771 9772 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9773 9774 // Emit the loop header block. 9775 MapperCGF.EmitBlock(HeadBB); 9776 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9777 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9778 // Evaluate whether the initial condition is satisfied. 9779 llvm::Value *IsEmpty = 9780 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9781 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9782 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9783 9784 // Emit the loop body block. 9785 MapperCGF.EmitBlock(BodyBB); 9786 llvm::BasicBlock *LastBB = BodyBB; 9787 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9788 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9789 PtrPHI->addIncoming(PtrBegin, EntryBB); 9790 Address PtrCurrent = 9791 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9792 .getAlignment() 9793 .alignmentOfArrayElement(ElementSize)); 9794 // Privatize the declared variable of mapper to be the current array element. 9795 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9796 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9797 (void)Scope.Privatize(); 9798 9799 // Get map clause information. Fill up the arrays with all mapped variables. 9800 MappableExprsHandler::MapCombinedInfoTy Info; 9801 MappableExprsHandler MEHandler(*D, MapperCGF); 9802 MEHandler.generateAllInfoForMapper(Info); 9803 9804 // Call the runtime API __tgt_mapper_num_components to get the number of 9805 // pre-existing components. 9806 llvm::Value *OffloadingArgs[] = {Handle}; 9807 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9808 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9809 OMPRTL___tgt_mapper_num_components), 9810 OffloadingArgs); 9811 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9812 PreviousSize, 9813 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9814 9815 // Fill up the runtime mapper handle for all components. 9816 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9817 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9818 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9819 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9820 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9821 llvm::Value *CurSizeArg = Info.Sizes[I]; 9822 llvm::Value *CurNameArg = 9823 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9824 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9825 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9826 9827 // Extract the MEMBER_OF field from the map type. 9828 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9829 llvm::Value *MemberMapType = 9830 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9831 9832 // Combine the map type inherited from user-defined mapper with that 9833 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9834 // bits of the \a MapType, which is the input argument of the mapper 9835 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9836 // bits of MemberMapType. 9837 // [OpenMP 5.0], 1.2.6. map-type decay. 9838 // | alloc | to | from | tofrom | release | delete 9839 // ---------------------------------------------------------- 9840 // alloc | alloc | alloc | alloc | alloc | release | delete 9841 // to | alloc | to | alloc | to | release | delete 9842 // from | alloc | alloc | from | from | release | delete 9843 // tofrom | alloc | to | from | tofrom | release | delete 9844 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9845 MapType, 9846 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9847 MappableExprsHandler::OMP_MAP_FROM)); 9848 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9849 llvm::BasicBlock *AllocElseBB = 9850 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9851 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9852 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9853 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9854 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9855 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9856 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9857 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9858 MapperCGF.EmitBlock(AllocBB); 9859 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9860 MemberMapType, 9861 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9862 MappableExprsHandler::OMP_MAP_FROM))); 9863 MapperCGF.Builder.CreateBr(EndBB); 9864 MapperCGF.EmitBlock(AllocElseBB); 9865 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9866 LeftToFrom, 9867 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9868 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9869 // In case of to, clear OMP_MAP_FROM. 9870 MapperCGF.EmitBlock(ToBB); 9871 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9872 MemberMapType, 9873 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9874 MapperCGF.Builder.CreateBr(EndBB); 9875 MapperCGF.EmitBlock(ToElseBB); 9876 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9877 LeftToFrom, 9878 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9879 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9880 // In case of from, clear OMP_MAP_TO. 9881 MapperCGF.EmitBlock(FromBB); 9882 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9883 MemberMapType, 9884 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9885 // In case of tofrom, do nothing. 9886 MapperCGF.EmitBlock(EndBB); 9887 LastBB = EndBB; 9888 llvm::PHINode *CurMapType = 9889 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9890 CurMapType->addIncoming(AllocMapType, AllocBB); 9891 CurMapType->addIncoming(ToMapType, ToBB); 9892 CurMapType->addIncoming(FromMapType, FromBB); 9893 CurMapType->addIncoming(MemberMapType, ToElseBB); 9894 9895 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9896 CurSizeArg, CurMapType, CurNameArg}; 9897 if (Info.Mappers[I]) { 9898 // Call the corresponding mapper function. 9899 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9900 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9901 assert(MapperFunc && "Expect a valid mapper function is available."); 9902 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9903 } else { 9904 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9905 // data structure. 9906 MapperCGF.EmitRuntimeCall( 9907 OMPBuilder.getOrCreateRuntimeFunction( 9908 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9909 OffloadingArgs); 9910 } 9911 } 9912 9913 // Update the pointer to point to the next element that needs to be mapped, 9914 // and check whether we have mapped all elements. 9915 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 9916 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9917 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9918 PtrPHI->addIncoming(PtrNext, LastBB); 9919 llvm::Value *IsDone = 9920 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9921 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9922 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9923 9924 MapperCGF.EmitBlock(ExitBB); 9925 // Emit array deletion if this is an array section and \p MapType indicates 9926 // that deletion is required. 9927 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9928 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9929 9930 // Emit the function exit block. 9931 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9932 MapperCGF.FinishFunction(); 9933 UDMMap.try_emplace(D, Fn); 9934 if (CGF) { 9935 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9936 Decls.second.push_back(D); 9937 } 9938 } 9939 9940 /// Emit the array initialization or deletion portion for user-defined mapper 9941 /// code generation. First, it evaluates whether an array section is mapped and 9942 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9943 /// true, and \a MapType indicates to not delete this array, array 9944 /// initialization code is generated. If \a IsInit is false, and \a MapType 9945 /// indicates to not this array, array deletion code is generated. 9946 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9947 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9948 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9949 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9950 bool IsInit) { 9951 StringRef Prefix = IsInit ? ".init" : ".del"; 9952 9953 // Evaluate if this is an array section. 9954 llvm::BasicBlock *BodyBB = 9955 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9956 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9957 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9958 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9959 MapType, 9960 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9961 llvm::Value *DeleteCond; 9962 llvm::Value *Cond; 9963 if (IsInit) { 9964 // base != begin? 9965 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9966 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9967 // IsPtrAndObj? 9968 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9969 MapType, 9970 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9971 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9972 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9973 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9974 DeleteCond = MapperCGF.Builder.CreateIsNull( 9975 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9976 } else { 9977 Cond = IsArray; 9978 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9979 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9980 } 9981 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9982 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9983 9984 MapperCGF.EmitBlock(BodyBB); 9985 // Get the array size by multiplying element size and element number (i.e., \p 9986 // Size). 9987 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9988 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9989 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9990 // memory allocation/deletion purpose only. 9991 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9992 MapType, 9993 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9994 MappableExprsHandler::OMP_MAP_FROM))); 9995 MapTypeArg = MapperCGF.Builder.CreateOr( 9996 MapTypeArg, 9997 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 9998 9999 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10000 // data structure. 10001 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10002 ArraySize, MapTypeArg, MapName}; 10003 MapperCGF.EmitRuntimeCall( 10004 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10005 OMPRTL___tgt_push_mapper_component), 10006 OffloadingArgs); 10007 } 10008 10009 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10010 const OMPDeclareMapperDecl *D) { 10011 auto I = UDMMap.find(D); 10012 if (I != UDMMap.end()) 10013 return I->second; 10014 emitUserDefinedMapper(D); 10015 return UDMMap.lookup(D); 10016 } 10017 10018 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10019 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10020 llvm::Value *DeviceID, 10021 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10022 const OMPLoopDirective &D)> 10023 SizeEmitter) { 10024 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10025 const OMPExecutableDirective *TD = &D; 10026 // Get nested teams distribute kind directive, if any. 10027 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10028 TD = getNestedDistributeDirective(CGM.getContext(), D); 10029 if (!TD) 10030 return; 10031 const auto *LD = cast<OMPLoopDirective>(TD); 10032 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10033 PrePostActionTy &) { 10034 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10035 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10036 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10037 CGF.EmitRuntimeCall( 10038 OMPBuilder.getOrCreateRuntimeFunction( 10039 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10040 Args); 10041 } 10042 }; 10043 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10044 } 10045 10046 void CGOpenMPRuntime::emitTargetCall( 10047 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10048 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10049 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10050 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10051 const OMPLoopDirective &D)> 10052 SizeEmitter) { 10053 if (!CGF.HaveInsertPoint()) 10054 return; 10055 10056 assert(OutlinedFn && "Invalid outlined function!"); 10057 10058 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10059 D.hasClausesOfKind<OMPNowaitClause>(); 10060 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10061 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10062 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10063 PrePostActionTy &) { 10064 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10065 }; 10066 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10067 10068 CodeGenFunction::OMPTargetDataInfo InputInfo; 10069 llvm::Value *MapTypesArray = nullptr; 10070 llvm::Value *MapNamesArray = nullptr; 10071 // Fill up the pointer arrays and transfer execution to the device. 10072 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10073 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10074 &CapturedVars, 10075 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10076 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10077 // Reverse offloading is not supported, so just execute on the host. 10078 if (RequiresOuterTask) { 10079 CapturedVars.clear(); 10080 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10081 } 10082 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10083 return; 10084 } 10085 10086 // On top of the arrays that were filled up, the target offloading call 10087 // takes as arguments the device id as well as the host pointer. The host 10088 // pointer is used by the runtime library to identify the current target 10089 // region, so it only has to be unique and not necessarily point to 10090 // anything. It could be the pointer to the outlined function that 10091 // implements the target region, but we aren't using that so that the 10092 // compiler doesn't need to keep that, and could therefore inline the host 10093 // function if proven worthwhile during optimization. 10094 10095 // From this point on, we need to have an ID of the target region defined. 10096 assert(OutlinedFnID && "Invalid outlined function ID!"); 10097 10098 // Emit device ID if any. 10099 llvm::Value *DeviceID; 10100 if (Device.getPointer()) { 10101 assert((Device.getInt() == OMPC_DEVICE_unknown || 10102 Device.getInt() == OMPC_DEVICE_device_num) && 10103 "Expected device_num modifier."); 10104 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10105 DeviceID = 10106 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10107 } else { 10108 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10109 } 10110 10111 // Emit the number of elements in the offloading arrays. 10112 llvm::Value *PointerNum = 10113 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10114 10115 // Return value of the runtime offloading call. 10116 llvm::Value *Return; 10117 10118 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10119 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10120 10121 // Source location for the ident struct 10122 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10123 10124 // Emit tripcount for the target loop-based directive. 10125 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10126 10127 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10128 // The target region is an outlined function launched by the runtime 10129 // via calls __tgt_target() or __tgt_target_teams(). 10130 // 10131 // __tgt_target() launches a target region with one team and one thread, 10132 // executing a serial region. This master thread may in turn launch 10133 // more threads within its team upon encountering a parallel region, 10134 // however, no additional teams can be launched on the device. 10135 // 10136 // __tgt_target_teams() launches a target region with one or more teams, 10137 // each with one or more threads. This call is required for target 10138 // constructs such as: 10139 // 'target teams' 10140 // 'target' / 'teams' 10141 // 'target teams distribute parallel for' 10142 // 'target parallel' 10143 // and so on. 10144 // 10145 // Note that on the host and CPU targets, the runtime implementation of 10146 // these calls simply call the outlined function without forking threads. 10147 // The outlined functions themselves have runtime calls to 10148 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10149 // the compiler in emitTeamsCall() and emitParallelCall(). 10150 // 10151 // In contrast, on the NVPTX target, the implementation of 10152 // __tgt_target_teams() launches a GPU kernel with the requested number 10153 // of teams and threads so no additional calls to the runtime are required. 10154 if (NumTeams) { 10155 // If we have NumTeams defined this means that we have an enclosed teams 10156 // region. Therefore we also expect to have NumThreads defined. These two 10157 // values should be defined in the presence of a teams directive, 10158 // regardless of having any clauses associated. If the user is using teams 10159 // but no clauses, these two values will be the default that should be 10160 // passed to the runtime library - a 32-bit integer with the value zero. 10161 assert(NumThreads && "Thread limit expression should be available along " 10162 "with number of teams."); 10163 SmallVector<llvm::Value *> OffloadingArgs = { 10164 RTLoc, 10165 DeviceID, 10166 OutlinedFnID, 10167 PointerNum, 10168 InputInfo.BasePointersArray.getPointer(), 10169 InputInfo.PointersArray.getPointer(), 10170 InputInfo.SizesArray.getPointer(), 10171 MapTypesArray, 10172 MapNamesArray, 10173 InputInfo.MappersArray.getPointer(), 10174 NumTeams, 10175 NumThreads}; 10176 if (HasNowait) { 10177 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10178 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10179 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10180 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10181 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10182 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10183 } 10184 Return = CGF.EmitRuntimeCall( 10185 OMPBuilder.getOrCreateRuntimeFunction( 10186 CGM.getModule(), HasNowait 10187 ? OMPRTL___tgt_target_teams_nowait_mapper 10188 : OMPRTL___tgt_target_teams_mapper), 10189 OffloadingArgs); 10190 } else { 10191 SmallVector<llvm::Value *> OffloadingArgs = { 10192 RTLoc, 10193 DeviceID, 10194 OutlinedFnID, 10195 PointerNum, 10196 InputInfo.BasePointersArray.getPointer(), 10197 InputInfo.PointersArray.getPointer(), 10198 InputInfo.SizesArray.getPointer(), 10199 MapTypesArray, 10200 MapNamesArray, 10201 InputInfo.MappersArray.getPointer()}; 10202 if (HasNowait) { 10203 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10204 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10205 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10206 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10207 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10208 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10209 } 10210 Return = CGF.EmitRuntimeCall( 10211 OMPBuilder.getOrCreateRuntimeFunction( 10212 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10213 : OMPRTL___tgt_target_mapper), 10214 OffloadingArgs); 10215 } 10216 10217 // Check the error code and execute the host version if required. 10218 llvm::BasicBlock *OffloadFailedBlock = 10219 CGF.createBasicBlock("omp_offload.failed"); 10220 llvm::BasicBlock *OffloadContBlock = 10221 CGF.createBasicBlock("omp_offload.cont"); 10222 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10223 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10224 10225 CGF.EmitBlock(OffloadFailedBlock); 10226 if (RequiresOuterTask) { 10227 CapturedVars.clear(); 10228 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10229 } 10230 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10231 CGF.EmitBranch(OffloadContBlock); 10232 10233 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10234 }; 10235 10236 // Notify that the host version must be executed. 10237 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10238 RequiresOuterTask](CodeGenFunction &CGF, 10239 PrePostActionTy &) { 10240 if (RequiresOuterTask) { 10241 CapturedVars.clear(); 10242 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10243 } 10244 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10245 }; 10246 10247 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10248 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10249 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10250 // Fill up the arrays with all the captured variables. 10251 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10252 10253 // Get mappable expression information. 10254 MappableExprsHandler MEHandler(D, CGF); 10255 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10256 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10257 10258 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10259 auto *CV = CapturedVars.begin(); 10260 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10261 CE = CS.capture_end(); 10262 CI != CE; ++CI, ++RI, ++CV) { 10263 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10264 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10265 10266 // VLA sizes are passed to the outlined region by copy and do not have map 10267 // information associated. 10268 if (CI->capturesVariableArrayType()) { 10269 CurInfo.Exprs.push_back(nullptr); 10270 CurInfo.BasePointers.push_back(*CV); 10271 CurInfo.Pointers.push_back(*CV); 10272 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10273 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10274 // Copy to the device as an argument. No need to retrieve it. 10275 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10276 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10277 MappableExprsHandler::OMP_MAP_IMPLICIT); 10278 CurInfo.Mappers.push_back(nullptr); 10279 } else { 10280 // If we have any information in the map clause, we use it, otherwise we 10281 // just do a default mapping. 10282 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10283 if (!CI->capturesThis()) 10284 MappedVarSet.insert(CI->getCapturedVar()); 10285 else 10286 MappedVarSet.insert(nullptr); 10287 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10288 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10289 // Generate correct mapping for variables captured by reference in 10290 // lambdas. 10291 if (CI->capturesVariable()) 10292 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10293 CurInfo, LambdaPointers); 10294 } 10295 // We expect to have at least an element of information for this capture. 10296 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10297 "Non-existing map pointer for capture!"); 10298 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10299 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10300 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10301 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10302 "Inconsistent map information sizes!"); 10303 10304 // If there is an entry in PartialStruct it means we have a struct with 10305 // individual members mapped. Emit an extra combined entry. 10306 if (PartialStruct.Base.isValid()) { 10307 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10308 MEHandler.emitCombinedEntry( 10309 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10310 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10311 } 10312 10313 // We need to append the results of this capture to what we already have. 10314 CombinedInfo.append(CurInfo); 10315 } 10316 // Adjust MEMBER_OF flags for the lambdas captures. 10317 MEHandler.adjustMemberOfForLambdaCaptures( 10318 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10319 CombinedInfo.Types); 10320 // Map any list items in a map clause that were not captures because they 10321 // weren't referenced within the construct. 10322 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10323 10324 TargetDataInfo Info; 10325 // Fill up the arrays and create the arguments. 10326 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10327 emitOffloadingArraysArgument( 10328 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10329 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10330 {/*ForEndTask=*/false}); 10331 10332 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10333 InputInfo.BasePointersArray = 10334 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10335 InputInfo.PointersArray = 10336 Address(Info.PointersArray, CGM.getPointerAlign()); 10337 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10338 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10339 MapTypesArray = Info.MapTypesArray; 10340 MapNamesArray = Info.MapNamesArray; 10341 if (RequiresOuterTask) 10342 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10343 else 10344 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10345 }; 10346 10347 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10348 CodeGenFunction &CGF, PrePostActionTy &) { 10349 if (RequiresOuterTask) { 10350 CodeGenFunction::OMPTargetDataInfo InputInfo; 10351 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10352 } else { 10353 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10354 } 10355 }; 10356 10357 // If we have a target function ID it means that we need to support 10358 // offloading, otherwise, just execute on the host. We need to execute on host 10359 // regardless of the conditional in the if clause if, e.g., the user do not 10360 // specify target triples. 10361 if (OutlinedFnID) { 10362 if (IfCond) { 10363 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10364 } else { 10365 RegionCodeGenTy ThenRCG(TargetThenGen); 10366 ThenRCG(CGF); 10367 } 10368 } else { 10369 RegionCodeGenTy ElseRCG(TargetElseGen); 10370 ElseRCG(CGF); 10371 } 10372 } 10373 10374 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10375 StringRef ParentName) { 10376 if (!S) 10377 return; 10378 10379 // Codegen OMP target directives that offload compute to the device. 10380 bool RequiresDeviceCodegen = 10381 isa<OMPExecutableDirective>(S) && 10382 isOpenMPTargetExecutionDirective( 10383 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10384 10385 if (RequiresDeviceCodegen) { 10386 const auto &E = *cast<OMPExecutableDirective>(S); 10387 unsigned DeviceID; 10388 unsigned FileID; 10389 unsigned Line; 10390 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10391 FileID, Line); 10392 10393 // Is this a target region that should not be emitted as an entry point? If 10394 // so just signal we are done with this target region. 10395 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10396 ParentName, Line)) 10397 return; 10398 10399 switch (E.getDirectiveKind()) { 10400 case OMPD_target: 10401 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10402 cast<OMPTargetDirective>(E)); 10403 break; 10404 case OMPD_target_parallel: 10405 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10406 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10407 break; 10408 case OMPD_target_teams: 10409 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10410 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10411 break; 10412 case OMPD_target_teams_distribute: 10413 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10414 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10415 break; 10416 case OMPD_target_teams_distribute_simd: 10417 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10418 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10419 break; 10420 case OMPD_target_parallel_for: 10421 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10422 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10423 break; 10424 case OMPD_target_parallel_for_simd: 10425 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10426 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10427 break; 10428 case OMPD_target_simd: 10429 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10430 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10431 break; 10432 case OMPD_target_teams_distribute_parallel_for: 10433 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10434 CGM, ParentName, 10435 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10436 break; 10437 case OMPD_target_teams_distribute_parallel_for_simd: 10438 CodeGenFunction:: 10439 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10440 CGM, ParentName, 10441 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10442 break; 10443 case OMPD_parallel: 10444 case OMPD_for: 10445 case OMPD_parallel_for: 10446 case OMPD_parallel_master: 10447 case OMPD_parallel_sections: 10448 case OMPD_for_simd: 10449 case OMPD_parallel_for_simd: 10450 case OMPD_cancel: 10451 case OMPD_cancellation_point: 10452 case OMPD_ordered: 10453 case OMPD_threadprivate: 10454 case OMPD_allocate: 10455 case OMPD_task: 10456 case OMPD_simd: 10457 case OMPD_tile: 10458 case OMPD_unroll: 10459 case OMPD_sections: 10460 case OMPD_section: 10461 case OMPD_single: 10462 case OMPD_master: 10463 case OMPD_critical: 10464 case OMPD_taskyield: 10465 case OMPD_barrier: 10466 case OMPD_taskwait: 10467 case OMPD_taskgroup: 10468 case OMPD_atomic: 10469 case OMPD_flush: 10470 case OMPD_depobj: 10471 case OMPD_scan: 10472 case OMPD_teams: 10473 case OMPD_target_data: 10474 case OMPD_target_exit_data: 10475 case OMPD_target_enter_data: 10476 case OMPD_distribute: 10477 case OMPD_distribute_simd: 10478 case OMPD_distribute_parallel_for: 10479 case OMPD_distribute_parallel_for_simd: 10480 case OMPD_teams_distribute: 10481 case OMPD_teams_distribute_simd: 10482 case OMPD_teams_distribute_parallel_for: 10483 case OMPD_teams_distribute_parallel_for_simd: 10484 case OMPD_target_update: 10485 case OMPD_declare_simd: 10486 case OMPD_declare_variant: 10487 case OMPD_begin_declare_variant: 10488 case OMPD_end_declare_variant: 10489 case OMPD_declare_target: 10490 case OMPD_end_declare_target: 10491 case OMPD_declare_reduction: 10492 case OMPD_declare_mapper: 10493 case OMPD_taskloop: 10494 case OMPD_taskloop_simd: 10495 case OMPD_master_taskloop: 10496 case OMPD_master_taskloop_simd: 10497 case OMPD_parallel_master_taskloop: 10498 case OMPD_parallel_master_taskloop_simd: 10499 case OMPD_requires: 10500 case OMPD_unknown: 10501 default: 10502 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10503 } 10504 return; 10505 } 10506 10507 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10508 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10509 return; 10510 10511 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10512 return; 10513 } 10514 10515 // If this is a lambda function, look into its body. 10516 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10517 S = L->getBody(); 10518 10519 // Keep looking for target regions recursively. 10520 for (const Stmt *II : S->children()) 10521 scanForTargetRegionsFunctions(II, ParentName); 10522 } 10523 10524 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10525 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10526 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10527 if (!DevTy) 10528 return false; 10529 // Do not emit device_type(nohost) functions for the host. 10530 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10531 return true; 10532 // Do not emit device_type(host) functions for the device. 10533 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10534 return true; 10535 return false; 10536 } 10537 10538 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10539 // If emitting code for the host, we do not process FD here. Instead we do 10540 // the normal code generation. 10541 if (!CGM.getLangOpts().OpenMPIsDevice) { 10542 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10543 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10544 CGM.getLangOpts().OpenMPIsDevice)) 10545 return true; 10546 return false; 10547 } 10548 10549 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10550 // Try to detect target regions in the function. 10551 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10552 StringRef Name = CGM.getMangledName(GD); 10553 scanForTargetRegionsFunctions(FD->getBody(), Name); 10554 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10555 CGM.getLangOpts().OpenMPIsDevice)) 10556 return true; 10557 } 10558 10559 // Do not to emit function if it is not marked as declare target. 10560 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10561 AlreadyEmittedTargetDecls.count(VD) == 0; 10562 } 10563 10564 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10565 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10566 CGM.getLangOpts().OpenMPIsDevice)) 10567 return true; 10568 10569 if (!CGM.getLangOpts().OpenMPIsDevice) 10570 return false; 10571 10572 // Check if there are Ctors/Dtors in this declaration and look for target 10573 // regions in it. We use the complete variant to produce the kernel name 10574 // mangling. 10575 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10576 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10577 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10578 StringRef ParentName = 10579 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10580 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10581 } 10582 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10583 StringRef ParentName = 10584 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10585 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10586 } 10587 } 10588 10589 // Do not to emit variable if it is not marked as declare target. 10590 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10591 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10592 cast<VarDecl>(GD.getDecl())); 10593 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10594 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10595 HasRequiresUnifiedSharedMemory)) { 10596 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10597 return true; 10598 } 10599 return false; 10600 } 10601 10602 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10603 llvm::Constant *Addr) { 10604 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10605 !CGM.getLangOpts().OpenMPIsDevice) 10606 return; 10607 10608 // If we have host/nohost variables, they do not need to be registered. 10609 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10610 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10611 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10612 return; 10613 10614 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10615 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10616 if (!Res) { 10617 if (CGM.getLangOpts().OpenMPIsDevice) { 10618 // Register non-target variables being emitted in device code (debug info 10619 // may cause this). 10620 StringRef VarName = CGM.getMangledName(VD); 10621 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10622 } 10623 return; 10624 } 10625 // Register declare target variables. 10626 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10627 StringRef VarName; 10628 CharUnits VarSize; 10629 llvm::GlobalValue::LinkageTypes Linkage; 10630 10631 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10632 !HasRequiresUnifiedSharedMemory) { 10633 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10634 VarName = CGM.getMangledName(VD); 10635 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10636 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10637 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10638 } else { 10639 VarSize = CharUnits::Zero(); 10640 } 10641 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10642 // Temp solution to prevent optimizations of the internal variables. 10643 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10644 // Do not create a "ref-variable" if the original is not also available 10645 // on the host. 10646 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10647 return; 10648 std::string RefName = getName({VarName, "ref"}); 10649 if (!CGM.GetGlobalValue(RefName)) { 10650 llvm::Constant *AddrRef = 10651 getOrCreateInternalVariable(Addr->getType(), RefName); 10652 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10653 GVAddrRef->setConstant(/*Val=*/true); 10654 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10655 GVAddrRef->setInitializer(Addr); 10656 CGM.addCompilerUsedGlobal(GVAddrRef); 10657 } 10658 } 10659 } else { 10660 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10661 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10662 HasRequiresUnifiedSharedMemory)) && 10663 "Declare target attribute must link or to with unified memory."); 10664 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10665 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10666 else 10667 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10668 10669 if (CGM.getLangOpts().OpenMPIsDevice) { 10670 VarName = Addr->getName(); 10671 Addr = nullptr; 10672 } else { 10673 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10674 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10675 } 10676 VarSize = CGM.getPointerSize(); 10677 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10678 } 10679 10680 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10681 VarName, Addr, VarSize, Flags, Linkage); 10682 } 10683 10684 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10685 if (isa<FunctionDecl>(GD.getDecl()) || 10686 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10687 return emitTargetFunctions(GD); 10688 10689 return emitTargetGlobalVariable(GD); 10690 } 10691 10692 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10693 for (const VarDecl *VD : DeferredGlobalVariables) { 10694 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10695 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10696 if (!Res) 10697 continue; 10698 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10699 !HasRequiresUnifiedSharedMemory) { 10700 CGM.EmitGlobal(VD); 10701 } else { 10702 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10703 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10704 HasRequiresUnifiedSharedMemory)) && 10705 "Expected link clause or to clause with unified memory."); 10706 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10707 } 10708 } 10709 } 10710 10711 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10712 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10713 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10714 " Expected target-based directive."); 10715 } 10716 10717 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10718 for (const OMPClause *Clause : D->clauselists()) { 10719 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10720 HasRequiresUnifiedSharedMemory = true; 10721 } else if (const auto *AC = 10722 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10723 switch (AC->getAtomicDefaultMemOrderKind()) { 10724 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10725 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10726 break; 10727 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10728 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10729 break; 10730 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10731 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10732 break; 10733 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10734 break; 10735 } 10736 } 10737 } 10738 } 10739 10740 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10741 return RequiresAtomicOrdering; 10742 } 10743 10744 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10745 LangAS &AS) { 10746 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10747 return false; 10748 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10749 switch(A->getAllocatorType()) { 10750 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10751 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10752 // Not supported, fallback to the default mem space. 10753 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10754 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10755 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10756 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10757 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10758 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10759 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10760 AS = LangAS::Default; 10761 return true; 10762 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10763 llvm_unreachable("Expected predefined allocator for the variables with the " 10764 "static storage."); 10765 } 10766 return false; 10767 } 10768 10769 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10770 return HasRequiresUnifiedSharedMemory; 10771 } 10772 10773 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10774 CodeGenModule &CGM) 10775 : CGM(CGM) { 10776 if (CGM.getLangOpts().OpenMPIsDevice) { 10777 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10778 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10779 } 10780 } 10781 10782 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10783 if (CGM.getLangOpts().OpenMPIsDevice) 10784 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10785 } 10786 10787 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10788 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10789 return true; 10790 10791 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10792 // Do not to emit function if it is marked as declare target as it was already 10793 // emitted. 10794 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10795 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10796 if (auto *F = dyn_cast_or_null<llvm::Function>( 10797 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10798 return !F->isDeclaration(); 10799 return false; 10800 } 10801 return true; 10802 } 10803 10804 return !AlreadyEmittedTargetDecls.insert(D).second; 10805 } 10806 10807 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10808 // If we don't have entries or if we are emitting code for the device, we 10809 // don't need to do anything. 10810 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10811 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10812 (OffloadEntriesInfoManager.empty() && 10813 !HasEmittedDeclareTargetRegion && 10814 !HasEmittedTargetRegion)) 10815 return nullptr; 10816 10817 // Create and register the function that handles the requires directives. 10818 ASTContext &C = CGM.getContext(); 10819 10820 llvm::Function *RequiresRegFn; 10821 { 10822 CodeGenFunction CGF(CGM); 10823 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10825 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10826 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10827 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10828 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10829 // TODO: check for other requires clauses. 10830 // The requires directive takes effect only when a target region is 10831 // present in the compilation unit. Otherwise it is ignored and not 10832 // passed to the runtime. This avoids the runtime from throwing an error 10833 // for mismatching requires clauses across compilation units that don't 10834 // contain at least 1 target region. 10835 assert((HasEmittedTargetRegion || 10836 HasEmittedDeclareTargetRegion || 10837 !OffloadEntriesInfoManager.empty()) && 10838 "Target or declare target region expected."); 10839 if (HasRequiresUnifiedSharedMemory) 10840 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10841 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10842 CGM.getModule(), OMPRTL___tgt_register_requires), 10843 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10844 CGF.FinishFunction(); 10845 } 10846 return RequiresRegFn; 10847 } 10848 10849 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10850 const OMPExecutableDirective &D, 10851 SourceLocation Loc, 10852 llvm::Function *OutlinedFn, 10853 ArrayRef<llvm::Value *> CapturedVars) { 10854 if (!CGF.HaveInsertPoint()) 10855 return; 10856 10857 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10858 CodeGenFunction::RunCleanupsScope Scope(CGF); 10859 10860 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10861 llvm::Value *Args[] = { 10862 RTLoc, 10863 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10864 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10865 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10866 RealArgs.append(std::begin(Args), std::end(Args)); 10867 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10868 10869 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10870 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10871 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10872 } 10873 10874 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10875 const Expr *NumTeams, 10876 const Expr *ThreadLimit, 10877 SourceLocation Loc) { 10878 if (!CGF.HaveInsertPoint()) 10879 return; 10880 10881 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10882 10883 llvm::Value *NumTeamsVal = 10884 NumTeams 10885 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10886 CGF.CGM.Int32Ty, /* isSigned = */ true) 10887 : CGF.Builder.getInt32(0); 10888 10889 llvm::Value *ThreadLimitVal = 10890 ThreadLimit 10891 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10892 CGF.CGM.Int32Ty, /* isSigned = */ true) 10893 : CGF.Builder.getInt32(0); 10894 10895 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10896 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10897 ThreadLimitVal}; 10898 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10899 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10900 PushNumTeamsArgs); 10901 } 10902 10903 void CGOpenMPRuntime::emitTargetDataCalls( 10904 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10905 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10906 if (!CGF.HaveInsertPoint()) 10907 return; 10908 10909 // Action used to replace the default codegen action and turn privatization 10910 // off. 10911 PrePostActionTy NoPrivAction; 10912 10913 // Generate the code for the opening of the data environment. Capture all the 10914 // arguments of the runtime call by reference because they are used in the 10915 // closing of the region. 10916 auto &&BeginThenGen = [this, &D, Device, &Info, 10917 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10918 // Fill up the arrays with all the mapped variables. 10919 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10920 10921 // Get map clause information. 10922 MappableExprsHandler MEHandler(D, CGF); 10923 MEHandler.generateAllInfo(CombinedInfo); 10924 10925 // Fill up the arrays and create the arguments. 10926 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10927 /*IsNonContiguous=*/true); 10928 10929 llvm::Value *BasePointersArrayArg = nullptr; 10930 llvm::Value *PointersArrayArg = nullptr; 10931 llvm::Value *SizesArrayArg = nullptr; 10932 llvm::Value *MapTypesArrayArg = nullptr; 10933 llvm::Value *MapNamesArrayArg = nullptr; 10934 llvm::Value *MappersArrayArg = nullptr; 10935 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10936 SizesArrayArg, MapTypesArrayArg, 10937 MapNamesArrayArg, MappersArrayArg, Info); 10938 10939 // Emit device ID if any. 10940 llvm::Value *DeviceID = nullptr; 10941 if (Device) { 10942 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10943 CGF.Int64Ty, /*isSigned=*/true); 10944 } else { 10945 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10946 } 10947 10948 // Emit the number of elements in the offloading arrays. 10949 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10950 // 10951 // Source location for the ident struct 10952 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10953 10954 llvm::Value *OffloadingArgs[] = {RTLoc, 10955 DeviceID, 10956 PointerNum, 10957 BasePointersArrayArg, 10958 PointersArrayArg, 10959 SizesArrayArg, 10960 MapTypesArrayArg, 10961 MapNamesArrayArg, 10962 MappersArrayArg}; 10963 CGF.EmitRuntimeCall( 10964 OMPBuilder.getOrCreateRuntimeFunction( 10965 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10966 OffloadingArgs); 10967 10968 // If device pointer privatization is required, emit the body of the region 10969 // here. It will have to be duplicated: with and without privatization. 10970 if (!Info.CaptureDeviceAddrMap.empty()) 10971 CodeGen(CGF); 10972 }; 10973 10974 // Generate code for the closing of the data region. 10975 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10976 PrePostActionTy &) { 10977 assert(Info.isValid() && "Invalid data environment closing arguments."); 10978 10979 llvm::Value *BasePointersArrayArg = nullptr; 10980 llvm::Value *PointersArrayArg = nullptr; 10981 llvm::Value *SizesArrayArg = nullptr; 10982 llvm::Value *MapTypesArrayArg = nullptr; 10983 llvm::Value *MapNamesArrayArg = nullptr; 10984 llvm::Value *MappersArrayArg = nullptr; 10985 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10986 SizesArrayArg, MapTypesArrayArg, 10987 MapNamesArrayArg, MappersArrayArg, Info, 10988 {/*ForEndCall=*/true}); 10989 10990 // Emit device ID if any. 10991 llvm::Value *DeviceID = nullptr; 10992 if (Device) { 10993 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10994 CGF.Int64Ty, /*isSigned=*/true); 10995 } else { 10996 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10997 } 10998 10999 // Emit the number of elements in the offloading arrays. 11000 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11001 11002 // Source location for the ident struct 11003 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11004 11005 llvm::Value *OffloadingArgs[] = {RTLoc, 11006 DeviceID, 11007 PointerNum, 11008 BasePointersArrayArg, 11009 PointersArrayArg, 11010 SizesArrayArg, 11011 MapTypesArrayArg, 11012 MapNamesArrayArg, 11013 MappersArrayArg}; 11014 CGF.EmitRuntimeCall( 11015 OMPBuilder.getOrCreateRuntimeFunction( 11016 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11017 OffloadingArgs); 11018 }; 11019 11020 // If we need device pointer privatization, we need to emit the body of the 11021 // region with no privatization in the 'else' branch of the conditional. 11022 // Otherwise, we don't have to do anything. 11023 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11024 PrePostActionTy &) { 11025 if (!Info.CaptureDeviceAddrMap.empty()) { 11026 CodeGen.setAction(NoPrivAction); 11027 CodeGen(CGF); 11028 } 11029 }; 11030 11031 // We don't have to do anything to close the region if the if clause evaluates 11032 // to false. 11033 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11034 11035 if (IfCond) { 11036 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11037 } else { 11038 RegionCodeGenTy RCG(BeginThenGen); 11039 RCG(CGF); 11040 } 11041 11042 // If we don't require privatization of device pointers, we emit the body in 11043 // between the runtime calls. This avoids duplicating the body code. 11044 if (Info.CaptureDeviceAddrMap.empty()) { 11045 CodeGen.setAction(NoPrivAction); 11046 CodeGen(CGF); 11047 } 11048 11049 if (IfCond) { 11050 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11051 } else { 11052 RegionCodeGenTy RCG(EndThenGen); 11053 RCG(CGF); 11054 } 11055 } 11056 11057 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11058 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11059 const Expr *Device) { 11060 if (!CGF.HaveInsertPoint()) 11061 return; 11062 11063 assert((isa<OMPTargetEnterDataDirective>(D) || 11064 isa<OMPTargetExitDataDirective>(D) || 11065 isa<OMPTargetUpdateDirective>(D)) && 11066 "Expecting either target enter, exit data, or update directives."); 11067 11068 CodeGenFunction::OMPTargetDataInfo InputInfo; 11069 llvm::Value *MapTypesArray = nullptr; 11070 llvm::Value *MapNamesArray = nullptr; 11071 // Generate the code for the opening of the data environment. 11072 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11073 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11074 // Emit device ID if any. 11075 llvm::Value *DeviceID = nullptr; 11076 if (Device) { 11077 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11078 CGF.Int64Ty, /*isSigned=*/true); 11079 } else { 11080 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11081 } 11082 11083 // Emit the number of elements in the offloading arrays. 11084 llvm::Constant *PointerNum = 11085 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11086 11087 // Source location for the ident struct 11088 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11089 11090 llvm::Value *OffloadingArgs[] = {RTLoc, 11091 DeviceID, 11092 PointerNum, 11093 InputInfo.BasePointersArray.getPointer(), 11094 InputInfo.PointersArray.getPointer(), 11095 InputInfo.SizesArray.getPointer(), 11096 MapTypesArray, 11097 MapNamesArray, 11098 InputInfo.MappersArray.getPointer()}; 11099 11100 // Select the right runtime function call for each standalone 11101 // directive. 11102 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11103 RuntimeFunction RTLFn; 11104 switch (D.getDirectiveKind()) { 11105 case OMPD_target_enter_data: 11106 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11107 : OMPRTL___tgt_target_data_begin_mapper; 11108 break; 11109 case OMPD_target_exit_data: 11110 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11111 : OMPRTL___tgt_target_data_end_mapper; 11112 break; 11113 case OMPD_target_update: 11114 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11115 : OMPRTL___tgt_target_data_update_mapper; 11116 break; 11117 case OMPD_parallel: 11118 case OMPD_for: 11119 case OMPD_parallel_for: 11120 case OMPD_parallel_master: 11121 case OMPD_parallel_sections: 11122 case OMPD_for_simd: 11123 case OMPD_parallel_for_simd: 11124 case OMPD_cancel: 11125 case OMPD_cancellation_point: 11126 case OMPD_ordered: 11127 case OMPD_threadprivate: 11128 case OMPD_allocate: 11129 case OMPD_task: 11130 case OMPD_simd: 11131 case OMPD_tile: 11132 case OMPD_unroll: 11133 case OMPD_sections: 11134 case OMPD_section: 11135 case OMPD_single: 11136 case OMPD_master: 11137 case OMPD_critical: 11138 case OMPD_taskyield: 11139 case OMPD_barrier: 11140 case OMPD_taskwait: 11141 case OMPD_taskgroup: 11142 case OMPD_atomic: 11143 case OMPD_flush: 11144 case OMPD_depobj: 11145 case OMPD_scan: 11146 case OMPD_teams: 11147 case OMPD_target_data: 11148 case OMPD_distribute: 11149 case OMPD_distribute_simd: 11150 case OMPD_distribute_parallel_for: 11151 case OMPD_distribute_parallel_for_simd: 11152 case OMPD_teams_distribute: 11153 case OMPD_teams_distribute_simd: 11154 case OMPD_teams_distribute_parallel_for: 11155 case OMPD_teams_distribute_parallel_for_simd: 11156 case OMPD_declare_simd: 11157 case OMPD_declare_variant: 11158 case OMPD_begin_declare_variant: 11159 case OMPD_end_declare_variant: 11160 case OMPD_declare_target: 11161 case OMPD_end_declare_target: 11162 case OMPD_declare_reduction: 11163 case OMPD_declare_mapper: 11164 case OMPD_taskloop: 11165 case OMPD_taskloop_simd: 11166 case OMPD_master_taskloop: 11167 case OMPD_master_taskloop_simd: 11168 case OMPD_parallel_master_taskloop: 11169 case OMPD_parallel_master_taskloop_simd: 11170 case OMPD_target: 11171 case OMPD_target_simd: 11172 case OMPD_target_teams_distribute: 11173 case OMPD_target_teams_distribute_simd: 11174 case OMPD_target_teams_distribute_parallel_for: 11175 case OMPD_target_teams_distribute_parallel_for_simd: 11176 case OMPD_target_teams: 11177 case OMPD_target_parallel: 11178 case OMPD_target_parallel_for: 11179 case OMPD_target_parallel_for_simd: 11180 case OMPD_requires: 11181 case OMPD_unknown: 11182 default: 11183 llvm_unreachable("Unexpected standalone target data directive."); 11184 break; 11185 } 11186 CGF.EmitRuntimeCall( 11187 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11188 OffloadingArgs); 11189 }; 11190 11191 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11192 &MapNamesArray](CodeGenFunction &CGF, 11193 PrePostActionTy &) { 11194 // Fill up the arrays with all the mapped variables. 11195 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11196 11197 // Get map clause information. 11198 MappableExprsHandler MEHandler(D, CGF); 11199 MEHandler.generateAllInfo(CombinedInfo); 11200 11201 TargetDataInfo Info; 11202 // Fill up the arrays and create the arguments. 11203 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11204 /*IsNonContiguous=*/true); 11205 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11206 D.hasClausesOfKind<OMPNowaitClause>(); 11207 emitOffloadingArraysArgument( 11208 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11209 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11210 {/*ForEndTask=*/false}); 11211 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11212 InputInfo.BasePointersArray = 11213 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11214 InputInfo.PointersArray = 11215 Address(Info.PointersArray, CGM.getPointerAlign()); 11216 InputInfo.SizesArray = 11217 Address(Info.SizesArray, CGM.getPointerAlign()); 11218 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11219 MapTypesArray = Info.MapTypesArray; 11220 MapNamesArray = Info.MapNamesArray; 11221 if (RequiresOuterTask) 11222 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11223 else 11224 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11225 }; 11226 11227 if (IfCond) { 11228 emitIfClause(CGF, IfCond, TargetThenGen, 11229 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11230 } else { 11231 RegionCodeGenTy ThenRCG(TargetThenGen); 11232 ThenRCG(CGF); 11233 } 11234 } 11235 11236 namespace { 11237 /// Kind of parameter in a function with 'declare simd' directive. 11238 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11239 /// Attribute set of the parameter. 11240 struct ParamAttrTy { 11241 ParamKindTy Kind = Vector; 11242 llvm::APSInt StrideOrArg; 11243 llvm::APSInt Alignment; 11244 }; 11245 } // namespace 11246 11247 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11248 ArrayRef<ParamAttrTy> ParamAttrs) { 11249 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11250 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11251 // of that clause. The VLEN value must be power of 2. 11252 // In other case the notion of the function`s "characteristic data type" (CDT) 11253 // is used to compute the vector length. 11254 // CDT is defined in the following order: 11255 // a) For non-void function, the CDT is the return type. 11256 // b) If the function has any non-uniform, non-linear parameters, then the 11257 // CDT is the type of the first such parameter. 11258 // c) If the CDT determined by a) or b) above is struct, union, or class 11259 // type which is pass-by-value (except for the type that maps to the 11260 // built-in complex data type), the characteristic data type is int. 11261 // d) If none of the above three cases is applicable, the CDT is int. 11262 // The VLEN is then determined based on the CDT and the size of vector 11263 // register of that ISA for which current vector version is generated. The 11264 // VLEN is computed using the formula below: 11265 // VLEN = sizeof(vector_register) / sizeof(CDT), 11266 // where vector register size specified in section 3.2.1 Registers and the 11267 // Stack Frame of original AMD64 ABI document. 11268 QualType RetType = FD->getReturnType(); 11269 if (RetType.isNull()) 11270 return 0; 11271 ASTContext &C = FD->getASTContext(); 11272 QualType CDT; 11273 if (!RetType.isNull() && !RetType->isVoidType()) { 11274 CDT = RetType; 11275 } else { 11276 unsigned Offset = 0; 11277 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11278 if (ParamAttrs[Offset].Kind == Vector) 11279 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11280 ++Offset; 11281 } 11282 if (CDT.isNull()) { 11283 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11284 if (ParamAttrs[I + Offset].Kind == Vector) { 11285 CDT = FD->getParamDecl(I)->getType(); 11286 break; 11287 } 11288 } 11289 } 11290 } 11291 if (CDT.isNull()) 11292 CDT = C.IntTy; 11293 CDT = CDT->getCanonicalTypeUnqualified(); 11294 if (CDT->isRecordType() || CDT->isUnionType()) 11295 CDT = C.IntTy; 11296 return C.getTypeSize(CDT); 11297 } 11298 11299 static void 11300 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11301 const llvm::APSInt &VLENVal, 11302 ArrayRef<ParamAttrTy> ParamAttrs, 11303 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11304 struct ISADataTy { 11305 char ISA; 11306 unsigned VecRegSize; 11307 }; 11308 ISADataTy ISAData[] = { 11309 { 11310 'b', 128 11311 }, // SSE 11312 { 11313 'c', 256 11314 }, // AVX 11315 { 11316 'd', 256 11317 }, // AVX2 11318 { 11319 'e', 512 11320 }, // AVX512 11321 }; 11322 llvm::SmallVector<char, 2> Masked; 11323 switch (State) { 11324 case OMPDeclareSimdDeclAttr::BS_Undefined: 11325 Masked.push_back('N'); 11326 Masked.push_back('M'); 11327 break; 11328 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11329 Masked.push_back('N'); 11330 break; 11331 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11332 Masked.push_back('M'); 11333 break; 11334 } 11335 for (char Mask : Masked) { 11336 for (const ISADataTy &Data : ISAData) { 11337 SmallString<256> Buffer; 11338 llvm::raw_svector_ostream Out(Buffer); 11339 Out << "_ZGV" << Data.ISA << Mask; 11340 if (!VLENVal) { 11341 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11342 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11343 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11344 } else { 11345 Out << VLENVal; 11346 } 11347 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11348 switch (ParamAttr.Kind){ 11349 case LinearWithVarStride: 11350 Out << 's' << ParamAttr.StrideOrArg; 11351 break; 11352 case Linear: 11353 Out << 'l'; 11354 if (ParamAttr.StrideOrArg != 1) 11355 Out << ParamAttr.StrideOrArg; 11356 break; 11357 case Uniform: 11358 Out << 'u'; 11359 break; 11360 case Vector: 11361 Out << 'v'; 11362 break; 11363 } 11364 if (!!ParamAttr.Alignment) 11365 Out << 'a' << ParamAttr.Alignment; 11366 } 11367 Out << '_' << Fn->getName(); 11368 Fn->addFnAttr(Out.str()); 11369 } 11370 } 11371 } 11372 11373 // This are the Functions that are needed to mangle the name of the 11374 // vector functions generated by the compiler, according to the rules 11375 // defined in the "Vector Function ABI specifications for AArch64", 11376 // available at 11377 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11378 11379 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11380 /// 11381 /// TODO: Need to implement the behavior for reference marked with a 11382 /// var or no linear modifiers (1.b in the section). For this, we 11383 /// need to extend ParamKindTy to support the linear modifiers. 11384 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11385 QT = QT.getCanonicalType(); 11386 11387 if (QT->isVoidType()) 11388 return false; 11389 11390 if (Kind == ParamKindTy::Uniform) 11391 return false; 11392 11393 if (Kind == ParamKindTy::Linear) 11394 return false; 11395 11396 // TODO: Handle linear references with modifiers 11397 11398 if (Kind == ParamKindTy::LinearWithVarStride) 11399 return false; 11400 11401 return true; 11402 } 11403 11404 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11405 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11406 QT = QT.getCanonicalType(); 11407 unsigned Size = C.getTypeSize(QT); 11408 11409 // Only scalars and complex within 16 bytes wide set PVB to true. 11410 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11411 return false; 11412 11413 if (QT->isFloatingType()) 11414 return true; 11415 11416 if (QT->isIntegerType()) 11417 return true; 11418 11419 if (QT->isPointerType()) 11420 return true; 11421 11422 // TODO: Add support for complex types (section 3.1.2, item 2). 11423 11424 return false; 11425 } 11426 11427 /// Computes the lane size (LS) of a return type or of an input parameter, 11428 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11429 /// TODO: Add support for references, section 3.2.1, item 1. 11430 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11431 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11432 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11433 if (getAArch64PBV(PTy, C)) 11434 return C.getTypeSize(PTy); 11435 } 11436 if (getAArch64PBV(QT, C)) 11437 return C.getTypeSize(QT); 11438 11439 return C.getTypeSize(C.getUIntPtrType()); 11440 } 11441 11442 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11443 // signature of the scalar function, as defined in 3.2.2 of the 11444 // AAVFABI. 11445 static std::tuple<unsigned, unsigned, bool> 11446 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11447 QualType RetType = FD->getReturnType().getCanonicalType(); 11448 11449 ASTContext &C = FD->getASTContext(); 11450 11451 bool OutputBecomesInput = false; 11452 11453 llvm::SmallVector<unsigned, 8> Sizes; 11454 if (!RetType->isVoidType()) { 11455 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11456 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11457 OutputBecomesInput = true; 11458 } 11459 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11460 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11461 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11462 } 11463 11464 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11465 // The LS of a function parameter / return value can only be a power 11466 // of 2, starting from 8 bits, up to 128. 11467 assert(std::all_of(Sizes.begin(), Sizes.end(), 11468 [](unsigned Size) { 11469 return Size == 8 || Size == 16 || Size == 32 || 11470 Size == 64 || Size == 128; 11471 }) && 11472 "Invalid size"); 11473 11474 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11475 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11476 OutputBecomesInput); 11477 } 11478 11479 /// Mangle the parameter part of the vector function name according to 11480 /// their OpenMP classification. The mangling function is defined in 11481 /// section 3.5 of the AAVFABI. 11482 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11483 SmallString<256> Buffer; 11484 llvm::raw_svector_ostream Out(Buffer); 11485 for (const auto &ParamAttr : ParamAttrs) { 11486 switch (ParamAttr.Kind) { 11487 case LinearWithVarStride: 11488 Out << "ls" << ParamAttr.StrideOrArg; 11489 break; 11490 case Linear: 11491 Out << 'l'; 11492 // Don't print the step value if it is not present or if it is 11493 // equal to 1. 11494 if (ParamAttr.StrideOrArg != 1) 11495 Out << ParamAttr.StrideOrArg; 11496 break; 11497 case Uniform: 11498 Out << 'u'; 11499 break; 11500 case Vector: 11501 Out << 'v'; 11502 break; 11503 } 11504 11505 if (!!ParamAttr.Alignment) 11506 Out << 'a' << ParamAttr.Alignment; 11507 } 11508 11509 return std::string(Out.str()); 11510 } 11511 11512 // Function used to add the attribute. The parameter `VLEN` is 11513 // templated to allow the use of "x" when targeting scalable functions 11514 // for SVE. 11515 template <typename T> 11516 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11517 char ISA, StringRef ParSeq, 11518 StringRef MangledName, bool OutputBecomesInput, 11519 llvm::Function *Fn) { 11520 SmallString<256> Buffer; 11521 llvm::raw_svector_ostream Out(Buffer); 11522 Out << Prefix << ISA << LMask << VLEN; 11523 if (OutputBecomesInput) 11524 Out << "v"; 11525 Out << ParSeq << "_" << MangledName; 11526 Fn->addFnAttr(Out.str()); 11527 } 11528 11529 // Helper function to generate the Advanced SIMD names depending on 11530 // the value of the NDS when simdlen is not present. 11531 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11532 StringRef Prefix, char ISA, 11533 StringRef ParSeq, StringRef MangledName, 11534 bool OutputBecomesInput, 11535 llvm::Function *Fn) { 11536 switch (NDS) { 11537 case 8: 11538 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11539 OutputBecomesInput, Fn); 11540 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11541 OutputBecomesInput, Fn); 11542 break; 11543 case 16: 11544 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11545 OutputBecomesInput, Fn); 11546 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11547 OutputBecomesInput, Fn); 11548 break; 11549 case 32: 11550 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11551 OutputBecomesInput, Fn); 11552 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11553 OutputBecomesInput, Fn); 11554 break; 11555 case 64: 11556 case 128: 11557 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11558 OutputBecomesInput, Fn); 11559 break; 11560 default: 11561 llvm_unreachable("Scalar type is too wide."); 11562 } 11563 } 11564 11565 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11566 static void emitAArch64DeclareSimdFunction( 11567 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11568 ArrayRef<ParamAttrTy> ParamAttrs, 11569 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11570 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11571 11572 // Get basic data for building the vector signature. 11573 const auto Data = getNDSWDS(FD, ParamAttrs); 11574 const unsigned NDS = std::get<0>(Data); 11575 const unsigned WDS = std::get<1>(Data); 11576 const bool OutputBecomesInput = std::get<2>(Data); 11577 11578 // Check the values provided via `simdlen` by the user. 11579 // 1. A `simdlen(1)` doesn't produce vector signatures, 11580 if (UserVLEN == 1) { 11581 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11582 DiagnosticsEngine::Warning, 11583 "The clause simdlen(1) has no effect when targeting aarch64."); 11584 CGM.getDiags().Report(SLoc, DiagID); 11585 return; 11586 } 11587 11588 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11589 // Advanced SIMD output. 11590 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11591 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11592 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11593 "power of 2 when targeting Advanced SIMD."); 11594 CGM.getDiags().Report(SLoc, DiagID); 11595 return; 11596 } 11597 11598 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11599 // limits. 11600 if (ISA == 's' && UserVLEN != 0) { 11601 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11602 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11603 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11604 "lanes in the architectural constraints " 11605 "for SVE (min is 128-bit, max is " 11606 "2048-bit, by steps of 128-bit)"); 11607 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11608 return; 11609 } 11610 } 11611 11612 // Sort out parameter sequence. 11613 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11614 StringRef Prefix = "_ZGV"; 11615 // Generate simdlen from user input (if any). 11616 if (UserVLEN) { 11617 if (ISA == 's') { 11618 // SVE generates only a masked function. 11619 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11620 OutputBecomesInput, Fn); 11621 } else { 11622 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11623 // Advanced SIMD generates one or two functions, depending on 11624 // the `[not]inbranch` clause. 11625 switch (State) { 11626 case OMPDeclareSimdDeclAttr::BS_Undefined: 11627 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11628 OutputBecomesInput, Fn); 11629 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11630 OutputBecomesInput, Fn); 11631 break; 11632 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11633 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11634 OutputBecomesInput, Fn); 11635 break; 11636 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11637 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11638 OutputBecomesInput, Fn); 11639 break; 11640 } 11641 } 11642 } else { 11643 // If no user simdlen is provided, follow the AAVFABI rules for 11644 // generating the vector length. 11645 if (ISA == 's') { 11646 // SVE, section 3.4.1, item 1. 11647 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11648 OutputBecomesInput, Fn); 11649 } else { 11650 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11651 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11652 // two vector names depending on the use of the clause 11653 // `[not]inbranch`. 11654 switch (State) { 11655 case OMPDeclareSimdDeclAttr::BS_Undefined: 11656 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11657 OutputBecomesInput, Fn); 11658 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11659 OutputBecomesInput, Fn); 11660 break; 11661 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11662 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11663 OutputBecomesInput, Fn); 11664 break; 11665 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11666 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11667 OutputBecomesInput, Fn); 11668 break; 11669 } 11670 } 11671 } 11672 } 11673 11674 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11675 llvm::Function *Fn) { 11676 ASTContext &C = CGM.getContext(); 11677 FD = FD->getMostRecentDecl(); 11678 // Map params to their positions in function decl. 11679 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11680 if (isa<CXXMethodDecl>(FD)) 11681 ParamPositions.try_emplace(FD, 0); 11682 unsigned ParamPos = ParamPositions.size(); 11683 for (const ParmVarDecl *P : FD->parameters()) { 11684 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11685 ++ParamPos; 11686 } 11687 while (FD) { 11688 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11689 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11690 // Mark uniform parameters. 11691 for (const Expr *E : Attr->uniforms()) { 11692 E = E->IgnoreParenImpCasts(); 11693 unsigned Pos; 11694 if (isa<CXXThisExpr>(E)) { 11695 Pos = ParamPositions[FD]; 11696 } else { 11697 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11698 ->getCanonicalDecl(); 11699 Pos = ParamPositions[PVD]; 11700 } 11701 ParamAttrs[Pos].Kind = Uniform; 11702 } 11703 // Get alignment info. 11704 auto NI = Attr->alignments_begin(); 11705 for (const Expr *E : Attr->aligneds()) { 11706 E = E->IgnoreParenImpCasts(); 11707 unsigned Pos; 11708 QualType ParmTy; 11709 if (isa<CXXThisExpr>(E)) { 11710 Pos = ParamPositions[FD]; 11711 ParmTy = E->getType(); 11712 } else { 11713 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11714 ->getCanonicalDecl(); 11715 Pos = ParamPositions[PVD]; 11716 ParmTy = PVD->getType(); 11717 } 11718 ParamAttrs[Pos].Alignment = 11719 (*NI) 11720 ? (*NI)->EvaluateKnownConstInt(C) 11721 : llvm::APSInt::getUnsigned( 11722 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11723 .getQuantity()); 11724 ++NI; 11725 } 11726 // Mark linear parameters. 11727 auto SI = Attr->steps_begin(); 11728 auto MI = Attr->modifiers_begin(); 11729 for (const Expr *E : Attr->linears()) { 11730 E = E->IgnoreParenImpCasts(); 11731 unsigned Pos; 11732 // Rescaling factor needed to compute the linear parameter 11733 // value in the mangled name. 11734 unsigned PtrRescalingFactor = 1; 11735 if (isa<CXXThisExpr>(E)) { 11736 Pos = ParamPositions[FD]; 11737 } else { 11738 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11739 ->getCanonicalDecl(); 11740 Pos = ParamPositions[PVD]; 11741 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11742 PtrRescalingFactor = CGM.getContext() 11743 .getTypeSizeInChars(P->getPointeeType()) 11744 .getQuantity(); 11745 } 11746 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11747 ParamAttr.Kind = Linear; 11748 // Assuming a stride of 1, for `linear` without modifiers. 11749 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11750 if (*SI) { 11751 Expr::EvalResult Result; 11752 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11753 if (const auto *DRE = 11754 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11755 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11756 ParamAttr.Kind = LinearWithVarStride; 11757 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11758 ParamPositions[StridePVD->getCanonicalDecl()]); 11759 } 11760 } 11761 } else { 11762 ParamAttr.StrideOrArg = Result.Val.getInt(); 11763 } 11764 } 11765 // If we are using a linear clause on a pointer, we need to 11766 // rescale the value of linear_step with the byte size of the 11767 // pointee type. 11768 if (Linear == ParamAttr.Kind) 11769 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11770 ++SI; 11771 ++MI; 11772 } 11773 llvm::APSInt VLENVal; 11774 SourceLocation ExprLoc; 11775 const Expr *VLENExpr = Attr->getSimdlen(); 11776 if (VLENExpr) { 11777 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11778 ExprLoc = VLENExpr->getExprLoc(); 11779 } 11780 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11781 if (CGM.getTriple().isX86()) { 11782 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11783 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11784 unsigned VLEN = VLENVal.getExtValue(); 11785 StringRef MangledName = Fn->getName(); 11786 if (CGM.getTarget().hasFeature("sve")) 11787 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11788 MangledName, 's', 128, Fn, ExprLoc); 11789 if (CGM.getTarget().hasFeature("neon")) 11790 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11791 MangledName, 'n', 128, Fn, ExprLoc); 11792 } 11793 } 11794 FD = FD->getPreviousDecl(); 11795 } 11796 } 11797 11798 namespace { 11799 /// Cleanup action for doacross support. 11800 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11801 public: 11802 static const int DoacrossFinArgs = 2; 11803 11804 private: 11805 llvm::FunctionCallee RTLFn; 11806 llvm::Value *Args[DoacrossFinArgs]; 11807 11808 public: 11809 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11810 ArrayRef<llvm::Value *> CallArgs) 11811 : RTLFn(RTLFn) { 11812 assert(CallArgs.size() == DoacrossFinArgs); 11813 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11814 } 11815 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11816 if (!CGF.HaveInsertPoint()) 11817 return; 11818 CGF.EmitRuntimeCall(RTLFn, Args); 11819 } 11820 }; 11821 } // namespace 11822 11823 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11824 const OMPLoopDirective &D, 11825 ArrayRef<Expr *> NumIterations) { 11826 if (!CGF.HaveInsertPoint()) 11827 return; 11828 11829 ASTContext &C = CGM.getContext(); 11830 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11831 RecordDecl *RD; 11832 if (KmpDimTy.isNull()) { 11833 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11834 // kmp_int64 lo; // lower 11835 // kmp_int64 up; // upper 11836 // kmp_int64 st; // stride 11837 // }; 11838 RD = C.buildImplicitRecord("kmp_dim"); 11839 RD->startDefinition(); 11840 addFieldToRecordDecl(C, RD, Int64Ty); 11841 addFieldToRecordDecl(C, RD, Int64Ty); 11842 addFieldToRecordDecl(C, RD, Int64Ty); 11843 RD->completeDefinition(); 11844 KmpDimTy = C.getRecordType(RD); 11845 } else { 11846 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11847 } 11848 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11849 QualType ArrayTy = 11850 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11851 11852 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11853 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11854 enum { LowerFD = 0, UpperFD, StrideFD }; 11855 // Fill dims with data. 11856 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11857 LValue DimsLVal = CGF.MakeAddrLValue( 11858 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11859 // dims.upper = num_iterations; 11860 LValue UpperLVal = CGF.EmitLValueForField( 11861 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11862 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11863 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11864 Int64Ty, NumIterations[I]->getExprLoc()); 11865 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11866 // dims.stride = 1; 11867 LValue StrideLVal = CGF.EmitLValueForField( 11868 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11869 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11870 StrideLVal); 11871 } 11872 11873 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11874 // kmp_int32 num_dims, struct kmp_dim * dims); 11875 llvm::Value *Args[] = { 11876 emitUpdateLocation(CGF, D.getBeginLoc()), 11877 getThreadID(CGF, D.getBeginLoc()), 11878 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11879 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11880 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11881 CGM.VoidPtrTy)}; 11882 11883 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11884 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11885 CGF.EmitRuntimeCall(RTLFn, Args); 11886 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11887 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11888 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11889 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11890 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11891 llvm::makeArrayRef(FiniArgs)); 11892 } 11893 11894 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11895 const OMPDependClause *C) { 11896 QualType Int64Ty = 11897 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11898 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11899 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11900 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11901 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11902 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11903 const Expr *CounterVal = C->getLoopData(I); 11904 assert(CounterVal); 11905 llvm::Value *CntVal = CGF.EmitScalarConversion( 11906 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11907 CounterVal->getExprLoc()); 11908 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11909 /*Volatile=*/false, Int64Ty); 11910 } 11911 llvm::Value *Args[] = { 11912 emitUpdateLocation(CGF, C->getBeginLoc()), 11913 getThreadID(CGF, C->getBeginLoc()), 11914 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11915 llvm::FunctionCallee RTLFn; 11916 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11917 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11918 OMPRTL___kmpc_doacross_post); 11919 } else { 11920 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11921 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11922 OMPRTL___kmpc_doacross_wait); 11923 } 11924 CGF.EmitRuntimeCall(RTLFn, Args); 11925 } 11926 11927 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11928 llvm::FunctionCallee Callee, 11929 ArrayRef<llvm::Value *> Args) const { 11930 assert(Loc.isValid() && "Outlined function call location must be valid."); 11931 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11932 11933 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11934 if (Fn->doesNotThrow()) { 11935 CGF.EmitNounwindRuntimeCall(Fn, Args); 11936 return; 11937 } 11938 } 11939 CGF.EmitRuntimeCall(Callee, Args); 11940 } 11941 11942 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11943 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11944 ArrayRef<llvm::Value *> Args) const { 11945 emitCall(CGF, Loc, OutlinedFn, Args); 11946 } 11947 11948 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11949 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11950 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11951 HasEmittedDeclareTargetRegion = true; 11952 } 11953 11954 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11955 const VarDecl *NativeParam, 11956 const VarDecl *TargetParam) const { 11957 return CGF.GetAddrOfLocalVar(NativeParam); 11958 } 11959 11960 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11961 const VarDecl *VD) { 11962 if (!VD) 11963 return Address::invalid(); 11964 Address UntiedAddr = Address::invalid(); 11965 Address UntiedRealAddr = Address::invalid(); 11966 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11967 if (It != FunctionToUntiedTaskStackMap.end()) { 11968 const UntiedLocalVarsAddressesMap &UntiedData = 11969 UntiedLocalVarsStack[It->second]; 11970 auto I = UntiedData.find(VD); 11971 if (I != UntiedData.end()) { 11972 UntiedAddr = I->second.first; 11973 UntiedRealAddr = I->second.second; 11974 } 11975 } 11976 const VarDecl *CVD = VD->getCanonicalDecl(); 11977 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11978 // Use the default allocation. 11979 if (!isAllocatableDecl(VD)) 11980 return UntiedAddr; 11981 llvm::Value *Size; 11982 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11983 if (CVD->getType()->isVariablyModifiedType()) { 11984 Size = CGF.getTypeSize(CVD->getType()); 11985 // Align the size: ((size + align - 1) / align) * align 11986 Size = CGF.Builder.CreateNUWAdd( 11987 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11988 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11989 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11990 } else { 11991 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11992 Size = CGM.getSize(Sz.alignTo(Align)); 11993 } 11994 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11995 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11996 assert(AA->getAllocator() && 11997 "Expected allocator expression for non-default allocator."); 11998 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11999 // According to the standard, the original allocator type is a enum 12000 // (integer). Convert to pointer type, if required. 12001 Allocator = CGF.EmitScalarConversion( 12002 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12003 AA->getAllocator()->getExprLoc()); 12004 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12005 12006 llvm::Value *Addr = 12007 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12008 CGM.getModule(), OMPRTL___kmpc_alloc), 12009 Args, getName({CVD->getName(), ".void.addr"})); 12010 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12011 CGM.getModule(), OMPRTL___kmpc_free); 12012 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12013 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12014 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12015 if (UntiedAddr.isValid()) 12016 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12017 12018 // Cleanup action for allocate support. 12019 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12020 llvm::FunctionCallee RTLFn; 12021 SourceLocation::UIntTy LocEncoding; 12022 Address Addr; 12023 const Expr *Allocator; 12024 12025 public: 12026 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12027 SourceLocation::UIntTy LocEncoding, Address Addr, 12028 const Expr *Allocator) 12029 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12030 Allocator(Allocator) {} 12031 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12032 if (!CGF.HaveInsertPoint()) 12033 return; 12034 llvm::Value *Args[3]; 12035 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12036 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12037 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12038 Addr.getPointer(), CGF.VoidPtrTy); 12039 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12040 // According to the standard, the original allocator type is a enum 12041 // (integer). Convert to pointer type, if required. 12042 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12043 CGF.getContext().VoidPtrTy, 12044 Allocator->getExprLoc()); 12045 Args[2] = AllocVal; 12046 12047 CGF.EmitRuntimeCall(RTLFn, Args); 12048 } 12049 }; 12050 Address VDAddr = 12051 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12052 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12053 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12054 VDAddr, AA->getAllocator()); 12055 if (UntiedRealAddr.isValid()) 12056 if (auto *Region = 12057 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12058 Region->emitUntiedSwitch(CGF); 12059 return VDAddr; 12060 } 12061 return UntiedAddr; 12062 } 12063 12064 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12065 const VarDecl *VD) const { 12066 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12067 if (It == FunctionToUntiedTaskStackMap.end()) 12068 return false; 12069 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12070 } 12071 12072 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12073 CodeGenModule &CGM, const OMPLoopDirective &S) 12074 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12075 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12076 if (!NeedToPush) 12077 return; 12078 NontemporalDeclsSet &DS = 12079 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12080 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12081 for (const Stmt *Ref : C->private_refs()) { 12082 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12083 const ValueDecl *VD; 12084 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12085 VD = DRE->getDecl(); 12086 } else { 12087 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12088 assert((ME->isImplicitCXXThis() || 12089 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12090 "Expected member of current class."); 12091 VD = ME->getMemberDecl(); 12092 } 12093 DS.insert(VD); 12094 } 12095 } 12096 } 12097 12098 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12099 if (!NeedToPush) 12100 return; 12101 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12102 } 12103 12104 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12105 CodeGenFunction &CGF, 12106 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12107 std::pair<Address, Address>> &LocalVars) 12108 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12109 if (!NeedToPush) 12110 return; 12111 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12112 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12113 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12114 } 12115 12116 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12117 if (!NeedToPush) 12118 return; 12119 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12120 } 12121 12122 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12123 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12124 12125 return llvm::any_of( 12126 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12127 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12128 } 12129 12130 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12131 const OMPExecutableDirective &S, 12132 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12133 const { 12134 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12135 // Vars in target/task regions must be excluded completely. 12136 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12137 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12138 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12139 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12140 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12141 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12142 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12143 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12144 } 12145 } 12146 // Exclude vars in private clauses. 12147 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12148 for (const Expr *Ref : C->varlists()) { 12149 if (!Ref->getType()->isScalarType()) 12150 continue; 12151 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12152 if (!DRE) 12153 continue; 12154 NeedToCheckForLPCs.insert(DRE->getDecl()); 12155 } 12156 } 12157 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12158 for (const Expr *Ref : C->varlists()) { 12159 if (!Ref->getType()->isScalarType()) 12160 continue; 12161 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12162 if (!DRE) 12163 continue; 12164 NeedToCheckForLPCs.insert(DRE->getDecl()); 12165 } 12166 } 12167 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12168 for (const Expr *Ref : C->varlists()) { 12169 if (!Ref->getType()->isScalarType()) 12170 continue; 12171 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12172 if (!DRE) 12173 continue; 12174 NeedToCheckForLPCs.insert(DRE->getDecl()); 12175 } 12176 } 12177 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12178 for (const Expr *Ref : C->varlists()) { 12179 if (!Ref->getType()->isScalarType()) 12180 continue; 12181 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12182 if (!DRE) 12183 continue; 12184 NeedToCheckForLPCs.insert(DRE->getDecl()); 12185 } 12186 } 12187 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12188 for (const Expr *Ref : C->varlists()) { 12189 if (!Ref->getType()->isScalarType()) 12190 continue; 12191 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12192 if (!DRE) 12193 continue; 12194 NeedToCheckForLPCs.insert(DRE->getDecl()); 12195 } 12196 } 12197 for (const Decl *VD : NeedToCheckForLPCs) { 12198 for (const LastprivateConditionalData &Data : 12199 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12200 if (Data.DeclToUniqueName.count(VD) > 0) { 12201 if (!Data.Disabled) 12202 NeedToAddForLPCsAsDisabled.insert(VD); 12203 break; 12204 } 12205 } 12206 } 12207 } 12208 12209 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12210 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12211 : CGM(CGF.CGM), 12212 Action((CGM.getLangOpts().OpenMP >= 50 && 12213 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12214 [](const OMPLastprivateClause *C) { 12215 return C->getKind() == 12216 OMPC_LASTPRIVATE_conditional; 12217 })) 12218 ? ActionToDo::PushAsLastprivateConditional 12219 : ActionToDo::DoNotPush) { 12220 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12221 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12222 return; 12223 assert(Action == ActionToDo::PushAsLastprivateConditional && 12224 "Expected a push action."); 12225 LastprivateConditionalData &Data = 12226 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12227 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12228 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12229 continue; 12230 12231 for (const Expr *Ref : C->varlists()) { 12232 Data.DeclToUniqueName.insert(std::make_pair( 12233 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12234 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12235 } 12236 } 12237 Data.IVLVal = IVLVal; 12238 Data.Fn = CGF.CurFn; 12239 } 12240 12241 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12242 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12243 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12244 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12245 if (CGM.getLangOpts().OpenMP < 50) 12246 return; 12247 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12248 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12249 if (!NeedToAddForLPCsAsDisabled.empty()) { 12250 Action = ActionToDo::DisableLastprivateConditional; 12251 LastprivateConditionalData &Data = 12252 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12253 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12254 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12255 Data.Fn = CGF.CurFn; 12256 Data.Disabled = true; 12257 } 12258 } 12259 12260 CGOpenMPRuntime::LastprivateConditionalRAII 12261 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12262 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12263 return LastprivateConditionalRAII(CGF, S); 12264 } 12265 12266 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12267 if (CGM.getLangOpts().OpenMP < 50) 12268 return; 12269 if (Action == ActionToDo::DisableLastprivateConditional) { 12270 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12271 "Expected list of disabled private vars."); 12272 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12273 } 12274 if (Action == ActionToDo::PushAsLastprivateConditional) { 12275 assert( 12276 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12277 "Expected list of lastprivate conditional vars."); 12278 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12279 } 12280 } 12281 12282 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12283 const VarDecl *VD) { 12284 ASTContext &C = CGM.getContext(); 12285 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12286 if (I == LastprivateConditionalToTypes.end()) 12287 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12288 QualType NewType; 12289 const FieldDecl *VDField; 12290 const FieldDecl *FiredField; 12291 LValue BaseLVal; 12292 auto VI = I->getSecond().find(VD); 12293 if (VI == I->getSecond().end()) { 12294 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12295 RD->startDefinition(); 12296 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12297 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12298 RD->completeDefinition(); 12299 NewType = C.getRecordType(RD); 12300 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12301 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12302 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12303 } else { 12304 NewType = std::get<0>(VI->getSecond()); 12305 VDField = std::get<1>(VI->getSecond()); 12306 FiredField = std::get<2>(VI->getSecond()); 12307 BaseLVal = std::get<3>(VI->getSecond()); 12308 } 12309 LValue FiredLVal = 12310 CGF.EmitLValueForField(BaseLVal, FiredField); 12311 CGF.EmitStoreOfScalar( 12312 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12313 FiredLVal); 12314 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12315 } 12316 12317 namespace { 12318 /// Checks if the lastprivate conditional variable is referenced in LHS. 12319 class LastprivateConditionalRefChecker final 12320 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12321 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12322 const Expr *FoundE = nullptr; 12323 const Decl *FoundD = nullptr; 12324 StringRef UniqueDeclName; 12325 LValue IVLVal; 12326 llvm::Function *FoundFn = nullptr; 12327 SourceLocation Loc; 12328 12329 public: 12330 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12331 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12332 llvm::reverse(LPM)) { 12333 auto It = D.DeclToUniqueName.find(E->getDecl()); 12334 if (It == D.DeclToUniqueName.end()) 12335 continue; 12336 if (D.Disabled) 12337 return false; 12338 FoundE = E; 12339 FoundD = E->getDecl()->getCanonicalDecl(); 12340 UniqueDeclName = It->second; 12341 IVLVal = D.IVLVal; 12342 FoundFn = D.Fn; 12343 break; 12344 } 12345 return FoundE == E; 12346 } 12347 bool VisitMemberExpr(const MemberExpr *E) { 12348 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12349 return false; 12350 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12351 llvm::reverse(LPM)) { 12352 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12353 if (It == D.DeclToUniqueName.end()) 12354 continue; 12355 if (D.Disabled) 12356 return false; 12357 FoundE = E; 12358 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12359 UniqueDeclName = It->second; 12360 IVLVal = D.IVLVal; 12361 FoundFn = D.Fn; 12362 break; 12363 } 12364 return FoundE == E; 12365 } 12366 bool VisitStmt(const Stmt *S) { 12367 for (const Stmt *Child : S->children()) { 12368 if (!Child) 12369 continue; 12370 if (const auto *E = dyn_cast<Expr>(Child)) 12371 if (!E->isGLValue()) 12372 continue; 12373 if (Visit(Child)) 12374 return true; 12375 } 12376 return false; 12377 } 12378 explicit LastprivateConditionalRefChecker( 12379 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12380 : LPM(LPM) {} 12381 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12382 getFoundData() const { 12383 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12384 } 12385 }; 12386 } // namespace 12387 12388 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12389 LValue IVLVal, 12390 StringRef UniqueDeclName, 12391 LValue LVal, 12392 SourceLocation Loc) { 12393 // Last updated loop counter for the lastprivate conditional var. 12394 // int<xx> last_iv = 0; 12395 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12396 llvm::Constant *LastIV = 12397 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12398 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12399 IVLVal.getAlignment().getAsAlign()); 12400 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12401 12402 // Last value of the lastprivate conditional. 12403 // decltype(priv_a) last_a; 12404 llvm::Constant *Last = getOrCreateInternalVariable( 12405 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12406 cast<llvm::GlobalVariable>(Last)->setAlignment( 12407 LVal.getAlignment().getAsAlign()); 12408 LValue LastLVal = 12409 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12410 12411 // Global loop counter. Required to handle inner parallel-for regions. 12412 // iv 12413 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12414 12415 // #pragma omp critical(a) 12416 // if (last_iv <= iv) { 12417 // last_iv = iv; 12418 // last_a = priv_a; 12419 // } 12420 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12421 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12422 Action.Enter(CGF); 12423 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12424 // (last_iv <= iv) ? Check if the variable is updated and store new 12425 // value in global var. 12426 llvm::Value *CmpRes; 12427 if (IVLVal.getType()->isSignedIntegerType()) { 12428 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12429 } else { 12430 assert(IVLVal.getType()->isUnsignedIntegerType() && 12431 "Loop iteration variable must be integer."); 12432 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12433 } 12434 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12435 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12436 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12437 // { 12438 CGF.EmitBlock(ThenBB); 12439 12440 // last_iv = iv; 12441 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12442 12443 // last_a = priv_a; 12444 switch (CGF.getEvaluationKind(LVal.getType())) { 12445 case TEK_Scalar: { 12446 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12447 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12448 break; 12449 } 12450 case TEK_Complex: { 12451 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12452 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12453 break; 12454 } 12455 case TEK_Aggregate: 12456 llvm_unreachable( 12457 "Aggregates are not supported in lastprivate conditional."); 12458 } 12459 // } 12460 CGF.EmitBranch(ExitBB); 12461 // There is no need to emit line number for unconditional branch. 12462 (void)ApplyDebugLocation::CreateEmpty(CGF); 12463 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12464 }; 12465 12466 if (CGM.getLangOpts().OpenMPSimd) { 12467 // Do not emit as a critical region as no parallel region could be emitted. 12468 RegionCodeGenTy ThenRCG(CodeGen); 12469 ThenRCG(CGF); 12470 } else { 12471 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12472 } 12473 } 12474 12475 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12476 const Expr *LHS) { 12477 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12478 return; 12479 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12480 if (!Checker.Visit(LHS)) 12481 return; 12482 const Expr *FoundE; 12483 const Decl *FoundD; 12484 StringRef UniqueDeclName; 12485 LValue IVLVal; 12486 llvm::Function *FoundFn; 12487 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12488 Checker.getFoundData(); 12489 if (FoundFn != CGF.CurFn) { 12490 // Special codegen for inner parallel regions. 12491 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12492 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12493 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12494 "Lastprivate conditional is not found in outer region."); 12495 QualType StructTy = std::get<0>(It->getSecond()); 12496 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12497 LValue PrivLVal = CGF.EmitLValue(FoundE); 12498 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12499 PrivLVal.getAddress(CGF), 12500 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12501 LValue BaseLVal = 12502 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12503 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12504 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12505 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12506 FiredLVal, llvm::AtomicOrdering::Unordered, 12507 /*IsVolatile=*/true, /*isInit=*/false); 12508 return; 12509 } 12510 12511 // Private address of the lastprivate conditional in the current context. 12512 // priv_a 12513 LValue LVal = CGF.EmitLValue(FoundE); 12514 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12515 FoundE->getExprLoc()); 12516 } 12517 12518 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12519 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12520 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12521 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12522 return; 12523 auto Range = llvm::reverse(LastprivateConditionalStack); 12524 auto It = llvm::find_if( 12525 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12526 if (It == Range.end() || It->Fn != CGF.CurFn) 12527 return; 12528 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12529 assert(LPCI != LastprivateConditionalToTypes.end() && 12530 "Lastprivates must be registered already."); 12531 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12532 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12533 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12534 for (const auto &Pair : It->DeclToUniqueName) { 12535 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12536 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12537 continue; 12538 auto I = LPCI->getSecond().find(Pair.first); 12539 assert(I != LPCI->getSecond().end() && 12540 "Lastprivate must be rehistered already."); 12541 // bool Cmp = priv_a.Fired != 0; 12542 LValue BaseLVal = std::get<3>(I->getSecond()); 12543 LValue FiredLVal = 12544 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12545 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12546 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12547 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12548 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12549 // if (Cmp) { 12550 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12551 CGF.EmitBlock(ThenBB); 12552 Address Addr = CGF.GetAddrOfLocalVar(VD); 12553 LValue LVal; 12554 if (VD->getType()->isReferenceType()) 12555 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12556 AlignmentSource::Decl); 12557 else 12558 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12559 AlignmentSource::Decl); 12560 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12561 D.getBeginLoc()); 12562 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12563 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12564 // } 12565 } 12566 } 12567 12568 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12569 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12570 SourceLocation Loc) { 12571 if (CGF.getLangOpts().OpenMP < 50) 12572 return; 12573 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12574 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12575 "Unknown lastprivate conditional variable."); 12576 StringRef UniqueName = It->second; 12577 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12578 // The variable was not updated in the region - exit. 12579 if (!GV) 12580 return; 12581 LValue LPLVal = CGF.MakeAddrLValue( 12582 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12583 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12584 CGF.EmitStoreOfScalar(Res, PrivLVal); 12585 } 12586 12587 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12588 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12589 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12590 llvm_unreachable("Not supported in SIMD-only mode"); 12591 } 12592 12593 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12594 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12595 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12596 llvm_unreachable("Not supported in SIMD-only mode"); 12597 } 12598 12599 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12600 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12601 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12602 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12603 bool Tied, unsigned &NumberOfParts) { 12604 llvm_unreachable("Not supported in SIMD-only mode"); 12605 } 12606 12607 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12608 SourceLocation Loc, 12609 llvm::Function *OutlinedFn, 12610 ArrayRef<llvm::Value *> CapturedVars, 12611 const Expr *IfCond) { 12612 llvm_unreachable("Not supported in SIMD-only mode"); 12613 } 12614 12615 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12616 CodeGenFunction &CGF, StringRef CriticalName, 12617 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12618 const Expr *Hint) { 12619 llvm_unreachable("Not supported in SIMD-only mode"); 12620 } 12621 12622 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12623 const RegionCodeGenTy &MasterOpGen, 12624 SourceLocation Loc) { 12625 llvm_unreachable("Not supported in SIMD-only mode"); 12626 } 12627 12628 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12629 const RegionCodeGenTy &MasterOpGen, 12630 SourceLocation Loc, 12631 const Expr *Filter) { 12632 llvm_unreachable("Not supported in SIMD-only mode"); 12633 } 12634 12635 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12636 SourceLocation Loc) { 12637 llvm_unreachable("Not supported in SIMD-only mode"); 12638 } 12639 12640 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12641 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12642 SourceLocation Loc) { 12643 llvm_unreachable("Not supported in SIMD-only mode"); 12644 } 12645 12646 void CGOpenMPSIMDRuntime::emitSingleRegion( 12647 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12648 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12649 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12650 ArrayRef<const Expr *> AssignmentOps) { 12651 llvm_unreachable("Not supported in SIMD-only mode"); 12652 } 12653 12654 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12655 const RegionCodeGenTy &OrderedOpGen, 12656 SourceLocation Loc, 12657 bool IsThreads) { 12658 llvm_unreachable("Not supported in SIMD-only mode"); 12659 } 12660 12661 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12662 SourceLocation Loc, 12663 OpenMPDirectiveKind Kind, 12664 bool EmitChecks, 12665 bool ForceSimpleCall) { 12666 llvm_unreachable("Not supported in SIMD-only mode"); 12667 } 12668 12669 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12670 CodeGenFunction &CGF, SourceLocation Loc, 12671 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12672 bool Ordered, const DispatchRTInput &DispatchValues) { 12673 llvm_unreachable("Not supported in SIMD-only mode"); 12674 } 12675 12676 void CGOpenMPSIMDRuntime::emitForStaticInit( 12677 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12678 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12679 llvm_unreachable("Not supported in SIMD-only mode"); 12680 } 12681 12682 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12683 CodeGenFunction &CGF, SourceLocation Loc, 12684 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12685 llvm_unreachable("Not supported in SIMD-only mode"); 12686 } 12687 12688 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12689 SourceLocation Loc, 12690 unsigned IVSize, 12691 bool IVSigned) { 12692 llvm_unreachable("Not supported in SIMD-only mode"); 12693 } 12694 12695 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12696 SourceLocation Loc, 12697 OpenMPDirectiveKind DKind) { 12698 llvm_unreachable("Not supported in SIMD-only mode"); 12699 } 12700 12701 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12702 SourceLocation Loc, 12703 unsigned IVSize, bool IVSigned, 12704 Address IL, Address LB, 12705 Address UB, Address ST) { 12706 llvm_unreachable("Not supported in SIMD-only mode"); 12707 } 12708 12709 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12710 llvm::Value *NumThreads, 12711 SourceLocation Loc) { 12712 llvm_unreachable("Not supported in SIMD-only mode"); 12713 } 12714 12715 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12716 ProcBindKind ProcBind, 12717 SourceLocation Loc) { 12718 llvm_unreachable("Not supported in SIMD-only mode"); 12719 } 12720 12721 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12722 const VarDecl *VD, 12723 Address VDAddr, 12724 SourceLocation Loc) { 12725 llvm_unreachable("Not supported in SIMD-only mode"); 12726 } 12727 12728 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12729 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12730 CodeGenFunction *CGF) { 12731 llvm_unreachable("Not supported in SIMD-only mode"); 12732 } 12733 12734 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12735 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12736 llvm_unreachable("Not supported in SIMD-only mode"); 12737 } 12738 12739 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12740 ArrayRef<const Expr *> Vars, 12741 SourceLocation Loc, 12742 llvm::AtomicOrdering AO) { 12743 llvm_unreachable("Not supported in SIMD-only mode"); 12744 } 12745 12746 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12747 const OMPExecutableDirective &D, 12748 llvm::Function *TaskFunction, 12749 QualType SharedsTy, Address Shareds, 12750 const Expr *IfCond, 12751 const OMPTaskDataTy &Data) { 12752 llvm_unreachable("Not supported in SIMD-only mode"); 12753 } 12754 12755 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12756 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12757 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12758 const Expr *IfCond, const OMPTaskDataTy &Data) { 12759 llvm_unreachable("Not supported in SIMD-only mode"); 12760 } 12761 12762 void CGOpenMPSIMDRuntime::emitReduction( 12763 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12764 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12765 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12766 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12767 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12768 ReductionOps, Options); 12769 } 12770 12771 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12772 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12773 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12774 llvm_unreachable("Not supported in SIMD-only mode"); 12775 } 12776 12777 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12778 SourceLocation Loc, 12779 bool IsWorksharingReduction) { 12780 llvm_unreachable("Not supported in SIMD-only mode"); 12781 } 12782 12783 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12784 SourceLocation Loc, 12785 ReductionCodeGen &RCG, 12786 unsigned N) { 12787 llvm_unreachable("Not supported in SIMD-only mode"); 12788 } 12789 12790 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12791 SourceLocation Loc, 12792 llvm::Value *ReductionsPtr, 12793 LValue SharedLVal) { 12794 llvm_unreachable("Not supported in SIMD-only mode"); 12795 } 12796 12797 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12798 SourceLocation Loc) { 12799 llvm_unreachable("Not supported in SIMD-only mode"); 12800 } 12801 12802 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12803 CodeGenFunction &CGF, SourceLocation Loc, 12804 OpenMPDirectiveKind CancelRegion) { 12805 llvm_unreachable("Not supported in SIMD-only mode"); 12806 } 12807 12808 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12809 SourceLocation Loc, const Expr *IfCond, 12810 OpenMPDirectiveKind CancelRegion) { 12811 llvm_unreachable("Not supported in SIMD-only mode"); 12812 } 12813 12814 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12815 const OMPExecutableDirective &D, StringRef ParentName, 12816 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12817 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12818 llvm_unreachable("Not supported in SIMD-only mode"); 12819 } 12820 12821 void CGOpenMPSIMDRuntime::emitTargetCall( 12822 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12823 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12824 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12825 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12826 const OMPLoopDirective &D)> 12827 SizeEmitter) { 12828 llvm_unreachable("Not supported in SIMD-only mode"); 12829 } 12830 12831 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12832 llvm_unreachable("Not supported in SIMD-only mode"); 12833 } 12834 12835 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12836 llvm_unreachable("Not supported in SIMD-only mode"); 12837 } 12838 12839 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12840 return false; 12841 } 12842 12843 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12844 const OMPExecutableDirective &D, 12845 SourceLocation Loc, 12846 llvm::Function *OutlinedFn, 12847 ArrayRef<llvm::Value *> CapturedVars) { 12848 llvm_unreachable("Not supported in SIMD-only mode"); 12849 } 12850 12851 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12852 const Expr *NumTeams, 12853 const Expr *ThreadLimit, 12854 SourceLocation Loc) { 12855 llvm_unreachable("Not supported in SIMD-only mode"); 12856 } 12857 12858 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12859 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12860 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12861 llvm_unreachable("Not supported in SIMD-only mode"); 12862 } 12863 12864 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12865 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12866 const Expr *Device) { 12867 llvm_unreachable("Not supported in SIMD-only mode"); 12868 } 12869 12870 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12871 const OMPLoopDirective &D, 12872 ArrayRef<Expr *> NumIterations) { 12873 llvm_unreachable("Not supported in SIMD-only mode"); 12874 } 12875 12876 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12877 const OMPDependClause *C) { 12878 llvm_unreachable("Not supported in SIMD-only mode"); 12879 } 12880 12881 const VarDecl * 12882 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12883 const VarDecl *NativeParam) const { 12884 llvm_unreachable("Not supported in SIMD-only mode"); 12885 } 12886 12887 Address 12888 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12889 const VarDecl *NativeParam, 12890 const VarDecl *TargetParam) const { 12891 llvm_unreachable("Not supported in SIMD-only mode"); 12892 } 12893