1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/APValue.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtVisitor.h" 24 #include "clang/Basic/BitmaskEnum.h" 25 #include "clang/Basic/FileManager.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SetOperations.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415 public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448 }; 449 450 /// Values for bit flags used in the ident_t to describe the fields. 451 /// All enumeric elements are named and described in accordance with the code 452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453 enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477 }; 478 479 namespace { 480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481 /// Values for bit flags for marking which requires clauses have been used. 482 enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496 }; 497 498 enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502 }; 503 } // anonymous namespace 504 505 /// Describes ident structure that describes a source location. 506 /// All descriptions are taken from 507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508 /// Original structure: 509 /// typedef struct ident { 510 /// kmp_int32 reserved_1; /**< might be used in Fortran; 511 /// see above */ 512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513 /// KMP_IDENT_KMPC identifies this union 514 /// member */ 515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516 /// see above */ 517 ///#if USE_ITT_BUILD 518 /// /* but currently used for storing 519 /// region-specific ITT */ 520 /// /* contextual information. */ 521 ///#endif /* USE_ITT_BUILD */ 522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523 /// C++ */ 524 /// char const *psource; /**< String describing the source location. 525 /// The string is composed of semi-colon separated 526 // fields which describe the source file, 527 /// the function and a pair of line numbers that 528 /// delimit the construct. 529 /// */ 530 /// } ident_t; 531 enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544 }; 545 546 /// Schedule types for 'omp for' loops (these enumerators are taken from 547 /// the enum sched_type in kmp.h). 548 enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576 }; 577 578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579 /// region. 580 class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583 public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590 }; 591 592 } // anonymous namespace 593 594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603 } 604 605 /// Check if the combiner is a call to UDR combiner and if it is so return the 606 /// UDR decl used for reduction. 607 static const OMPDeclareReductionDecl * 608 getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616 } 617 618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672 } 673 674 /// Emit initialization of arrays of complex types. 675 /// \param DestAddr Address of the array. 676 /// \param Type Type of array. 677 /// \param Init Initial expression of array. 678 /// \param SrcAddr Address of the original array. 679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 753 // Check whether we've reached the end. 754 llvm::Value *Done = 755 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 756 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 757 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 758 759 // Done. 760 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 761 } 762 763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 764 return CGF.EmitOMPSharedLValue(E); 765 } 766 767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 768 const Expr *E) { 769 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 770 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 771 return LValue(); 772 } 773 774 void ReductionCodeGen::emitAggregateInitialization( 775 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 776 const OMPDeclareReductionDecl *DRD) { 777 // Emit VarDecl with copy init for arrays. 778 // Get the address of the original variable captured in current 779 // captured region. 780 const auto *PrivateVD = 781 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 782 bool EmitDeclareReductionInit = 783 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 784 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 785 EmitDeclareReductionInit, 786 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 787 : PrivateVD->getInit(), 788 DRD, SharedLVal.getAddress(CGF)); 789 } 790 791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 792 ArrayRef<const Expr *> Origs, 793 ArrayRef<const Expr *> Privates, 794 ArrayRef<const Expr *> ReductionOps) { 795 ClausesData.reserve(Shareds.size()); 796 SharedAddresses.reserve(Shareds.size()); 797 Sizes.reserve(Shareds.size()); 798 BaseDecls.reserve(Shareds.size()); 799 const auto *IOrig = Origs.begin(); 800 const auto *IPriv = Privates.begin(); 801 const auto *IRed = ReductionOps.begin(); 802 for (const Expr *Ref : Shareds) { 803 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 804 std::advance(IOrig, 1); 805 std::advance(IPriv, 1); 806 std::advance(IRed, 1); 807 } 808 } 809 810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 811 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 812 "Number of generated lvalues must be exactly N."); 813 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 814 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 815 SharedAddresses.emplace_back(First, Second); 816 if (ClausesData[N].Shared == ClausesData[N].Ref) { 817 OrigAddresses.emplace_back(First, Second); 818 } else { 819 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 820 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 821 OrigAddresses.emplace_back(First, Second); 822 } 823 } 824 825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 826 const auto *PrivateVD = 827 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 828 QualType PrivateType = PrivateVD->getType(); 829 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 830 if (!PrivateType->isVariablyModifiedType()) { 831 Sizes.emplace_back( 832 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 833 nullptr); 834 return; 835 } 836 llvm::Value *Size; 837 llvm::Value *SizeInChars; 838 auto *ElemType = 839 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 840 ->getElementType(); 841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 842 if (AsArraySection) { 843 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 844 OrigAddresses[N].first.getPointer(CGF)); 845 Size = CGF.Builder.CreateNUWAdd( 846 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 847 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 848 } else { 849 SizeInChars = 850 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 851 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 852 } 853 Sizes.emplace_back(SizeInChars, Size); 854 CodeGenFunction::OpaqueValueMapping OpaqueMap( 855 CGF, 856 cast<OpaqueValueExpr>( 857 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 858 RValue::get(Size)); 859 CGF.EmitVariablyModifiedType(PrivateType); 860 } 861 862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 863 llvm::Value *Size) { 864 const auto *PrivateVD = 865 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 866 QualType PrivateType = PrivateVD->getType(); 867 if (!PrivateType->isVariablyModifiedType()) { 868 assert(!Size && !Sizes[N].second && 869 "Size should be nullptr for non-variably modified reduction " 870 "items."); 871 return; 872 } 873 CodeGenFunction::OpaqueValueMapping OpaqueMap( 874 CGF, 875 cast<OpaqueValueExpr>( 876 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 877 RValue::get(Size)); 878 CGF.EmitVariablyModifiedType(PrivateType); 879 } 880 881 void ReductionCodeGen::emitInitialization( 882 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 883 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 884 assert(SharedAddresses.size() > N && "No variable was generated"); 885 const auto *PrivateVD = 886 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 887 const OMPDeclareReductionDecl *DRD = 888 getReductionInit(ClausesData[N].ReductionOp); 889 QualType PrivateType = PrivateVD->getType(); 890 PrivateAddr = CGF.Builder.CreateElementBitCast( 891 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 892 QualType SharedType = SharedAddresses[N].first.getType(); 893 SharedLVal = CGF.MakeAddrLValue( 894 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 895 CGF.ConvertTypeForMem(SharedType)), 896 SharedType, SharedAddresses[N].first.getBaseInfo(), 897 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 898 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 899 if (DRD && DRD->getInitializer()) 900 (void)DefaultInit(CGF); 901 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 902 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 903 (void)DefaultInit(CGF); 904 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 905 PrivateAddr, SharedLVal.getAddress(CGF), 906 SharedLVal.getType()); 907 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 908 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 909 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 910 PrivateVD->getType().getQualifiers(), 911 /*IsInitializer=*/false); 912 } 913 } 914 915 bool ReductionCodeGen::needCleanups(unsigned N) { 916 const auto *PrivateVD = 917 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 918 QualType PrivateType = PrivateVD->getType(); 919 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 920 return DTorKind != QualType::DK_none; 921 } 922 923 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 924 Address PrivateAddr) { 925 const auto *PrivateVD = 926 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 927 QualType PrivateType = PrivateVD->getType(); 928 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 929 if (needCleanups(N)) { 930 PrivateAddr = CGF.Builder.CreateElementBitCast( 931 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 932 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 933 } 934 } 935 936 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 937 LValue BaseLV) { 938 BaseTy = BaseTy.getNonReferenceType(); 939 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 940 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 941 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 942 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 943 } else { 944 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 945 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 946 } 947 BaseTy = BaseTy->getPointeeType(); 948 } 949 return CGF.MakeAddrLValue( 950 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 951 CGF.ConvertTypeForMem(ElTy)), 952 BaseLV.getType(), BaseLV.getBaseInfo(), 953 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 954 } 955 956 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 957 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 958 llvm::Value *Addr) { 959 Address Tmp = Address::invalid(); 960 Address TopTmp = Address::invalid(); 961 Address MostTopTmp = Address::invalid(); 962 BaseTy = BaseTy.getNonReferenceType(); 963 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 964 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 965 Tmp = CGF.CreateMemTemp(BaseTy); 966 if (TopTmp.isValid()) 967 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 968 else 969 MostTopTmp = Tmp; 970 TopTmp = Tmp; 971 BaseTy = BaseTy->getPointeeType(); 972 } 973 llvm::Type *Ty = BaseLVType; 974 if (Tmp.isValid()) 975 Ty = Tmp.getElementType(); 976 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 977 if (Tmp.isValid()) { 978 CGF.Builder.CreateStore(Addr, Tmp); 979 return MostTopTmp; 980 } 981 return Address(Addr, BaseLVAlignment); 982 } 983 984 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 985 const VarDecl *OrigVD = nullptr; 986 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 987 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 988 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 989 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 990 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 991 Base = TempASE->getBase()->IgnoreParenImpCasts(); 992 DE = cast<DeclRefExpr>(Base); 993 OrigVD = cast<VarDecl>(DE->getDecl()); 994 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 995 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 996 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 997 Base = TempASE->getBase()->IgnoreParenImpCasts(); 998 DE = cast<DeclRefExpr>(Base); 999 OrigVD = cast<VarDecl>(DE->getDecl()); 1000 } 1001 return OrigVD; 1002 } 1003 1004 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1005 Address PrivateAddr) { 1006 const DeclRefExpr *DE; 1007 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1008 BaseDecls.emplace_back(OrigVD); 1009 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1010 LValue BaseLValue = 1011 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1012 OriginalBaseLValue); 1013 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1014 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1015 llvm::Value *PrivatePointer = 1016 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1017 PrivateAddr.getPointer(), 1018 SharedAddresses[N].first.getAddress(CGF).getType()); 1019 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1020 return castToBase(CGF, OrigVD->getType(), 1021 SharedAddresses[N].first.getType(), 1022 OriginalBaseLValue.getAddress(CGF).getType(), 1023 OriginalBaseLValue.getAlignment(), Ptr); 1024 } 1025 BaseDecls.emplace_back( 1026 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1027 return PrivateAddr; 1028 } 1029 1030 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1031 const OMPDeclareReductionDecl *DRD = 1032 getReductionInit(ClausesData[N].ReductionOp); 1033 return DRD && DRD->getInitializer(); 1034 } 1035 1036 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1037 return CGF.EmitLoadOfPointerLValue( 1038 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1039 getThreadIDVariable()->getType()->castAs<PointerType>()); 1040 } 1041 1042 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1043 if (!CGF.HaveInsertPoint()) 1044 return; 1045 // 1.2.2 OpenMP Language Terminology 1046 // Structured block - An executable statement with a single entry at the 1047 // top and a single exit at the bottom. 1048 // The point of exit cannot be a branch out of the structured block. 1049 // longjmp() and throw() must not violate the entry/exit criteria. 1050 CGF.EHStack.pushTerminate(); 1051 if (S) 1052 CGF.incrementProfileCounter(S); 1053 CodeGen(CGF); 1054 CGF.EHStack.popTerminate(); 1055 } 1056 1057 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1058 CodeGenFunction &CGF) { 1059 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1060 getThreadIDVariable()->getType(), 1061 AlignmentSource::Decl); 1062 } 1063 1064 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1065 QualType FieldTy) { 1066 auto *Field = FieldDecl::Create( 1067 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1068 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1069 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1070 Field->setAccess(AS_public); 1071 DC->addDecl(Field); 1072 return Field; 1073 } 1074 1075 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1076 StringRef Separator) 1077 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1078 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1079 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1080 1081 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1082 OMPBuilder.initialize(); 1083 loadOffloadInfoMetadata(); 1084 } 1085 1086 void CGOpenMPRuntime::clear() { 1087 InternalVars.clear(); 1088 // Clean non-target variable declarations possibly used only in debug info. 1089 for (const auto &Data : EmittedNonTargetVariables) { 1090 if (!Data.getValue().pointsToAliveValue()) 1091 continue; 1092 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1093 if (!GV) 1094 continue; 1095 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1096 continue; 1097 GV->eraseFromParent(); 1098 } 1099 } 1100 1101 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1102 SmallString<128> Buffer; 1103 llvm::raw_svector_ostream OS(Buffer); 1104 StringRef Sep = FirstSeparator; 1105 for (StringRef Part : Parts) { 1106 OS << Sep << Part; 1107 Sep = Separator; 1108 } 1109 return std::string(OS.str()); 1110 } 1111 1112 static llvm::Function * 1113 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1114 const Expr *CombinerInitializer, const VarDecl *In, 1115 const VarDecl *Out, bool IsCombiner) { 1116 // void .omp_combiner.(Ty *in, Ty *out); 1117 ASTContext &C = CGM.getContext(); 1118 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1119 FunctionArgList Args; 1120 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1121 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1122 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 Args.push_back(&OmpOutParm); 1125 Args.push_back(&OmpInParm); 1126 const CGFunctionInfo &FnInfo = 1127 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1128 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1129 std::string Name = CGM.getOpenMPRuntime().getName( 1130 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1131 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1132 Name, &CGM.getModule()); 1133 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1134 if (CGM.getLangOpts().Optimize) { 1135 Fn->removeFnAttr(llvm::Attribute::NoInline); 1136 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1137 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1138 } 1139 CodeGenFunction CGF(CGM); 1140 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1141 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1142 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1143 Out->getLocation()); 1144 CodeGenFunction::OMPPrivateScope Scope(CGF); 1145 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1146 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1147 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1148 .getAddress(CGF); 1149 }); 1150 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1151 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 (void)Scope.Privatize(); 1156 if (!IsCombiner && Out->hasInit() && 1157 !CGF.isTrivialInitializer(Out->getInit())) { 1158 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1159 Out->getType().getQualifiers(), 1160 /*IsInitializer=*/true); 1161 } 1162 if (CombinerInitializer) 1163 CGF.EmitIgnoredExpr(CombinerInitializer); 1164 Scope.ForceCleanup(); 1165 CGF.FinishFunction(); 1166 return Fn; 1167 } 1168 1169 void CGOpenMPRuntime::emitUserDefinedReduction( 1170 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1171 if (UDRMap.count(D) > 0) 1172 return; 1173 llvm::Function *Combiner = emitCombinerOrInitializer( 1174 CGM, D->getType(), D->getCombiner(), 1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1176 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1177 /*IsCombiner=*/true); 1178 llvm::Function *Initializer = nullptr; 1179 if (const Expr *Init = D->getInitializer()) { 1180 Initializer = emitCombinerOrInitializer( 1181 CGM, D->getType(), 1182 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1183 : nullptr, 1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1185 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1186 /*IsCombiner=*/false); 1187 } 1188 UDRMap.try_emplace(D, Combiner, Initializer); 1189 if (CGF) { 1190 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1191 Decls.second.push_back(D); 1192 } 1193 } 1194 1195 std::pair<llvm::Function *, llvm::Function *> 1196 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1197 auto I = UDRMap.find(D); 1198 if (I != UDRMap.end()) 1199 return I->second; 1200 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1201 return UDRMap.lookup(D); 1202 } 1203 1204 namespace { 1205 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1206 // Builder if one is present. 1207 struct PushAndPopStackRAII { 1208 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1209 bool HasCancel, llvm::omp::Directive Kind) 1210 : OMPBuilder(OMPBuilder) { 1211 if (!OMPBuilder) 1212 return; 1213 1214 // The following callback is the crucial part of clangs cleanup process. 1215 // 1216 // NOTE: 1217 // Once the OpenMPIRBuilder is used to create parallel regions (and 1218 // similar), the cancellation destination (Dest below) is determined via 1219 // IP. That means if we have variables to finalize we split the block at IP, 1220 // use the new block (=BB) as destination to build a JumpDest (via 1221 // getJumpDestInCurrentScope(BB)) which then is fed to 1222 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1223 // to push & pop an FinalizationInfo object. 1224 // The FiniCB will still be needed but at the point where the 1225 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1226 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1227 assert(IP.getBlock()->end() == IP.getPoint() && 1228 "Clang CG should cause non-terminated block!"); 1229 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1230 CGF.Builder.restoreIP(IP); 1231 CodeGenFunction::JumpDest Dest = 1232 CGF.getOMPCancelDestination(OMPD_parallel); 1233 CGF.EmitBranchThroughCleanup(Dest); 1234 }; 1235 1236 // TODO: Remove this once we emit parallel regions through the 1237 // OpenMPIRBuilder as it can do this setup internally. 1238 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1239 OMPBuilder->pushFinalizationCB(std::move(FI)); 1240 } 1241 ~PushAndPopStackRAII() { 1242 if (OMPBuilder) 1243 OMPBuilder->popFinalizationCB(); 1244 } 1245 llvm::OpenMPIRBuilder *OMPBuilder; 1246 }; 1247 } // namespace 1248 1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1250 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1252 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1253 assert(ThreadIDVar->getType()->isPointerType() && 1254 "thread id variable must be of type kmp_int32 *"); 1255 CodeGenFunction CGF(CGM, true); 1256 bool HasCancel = false; 1257 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1260 HasCancel = OPD->hasCancel(); 1261 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1262 HasCancel = OPSD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 1276 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1277 // parallel region to make cancellation barriers work properly. 1278 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1279 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1280 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1281 HasCancel, OutlinedHelperName); 1282 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1283 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1284 } 1285 1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1287 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1289 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1290 return emitParallelOrTeamsOutlinedFunction( 1291 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1292 } 1293 1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1297 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1298 return emitParallelOrTeamsOutlinedFunction( 1299 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1300 } 1301 1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1303 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1304 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1305 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1306 bool Tied, unsigned &NumberOfParts) { 1307 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1308 PrePostActionTy &) { 1309 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1310 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1311 llvm::Value *TaskArgs[] = { 1312 UpLoc, ThreadID, 1313 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1314 TaskTVar->getType()->castAs<PointerType>()) 1315 .getPointer(CGF)}; 1316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1317 CGM.getModule(), OMPRTL___kmpc_omp_task), 1318 TaskArgs); 1319 }; 1320 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1321 UntiedCodeGen); 1322 CodeGen.setAction(Action); 1323 assert(!ThreadIDVar->getType()->isPointerType() && 1324 "thread id variable must be of type kmp_int32 for tasks"); 1325 const OpenMPDirectiveKind Region = 1326 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1327 : OMPD_task; 1328 const CapturedStmt *CS = D.getCapturedStmt(Region); 1329 bool HasCancel = false; 1330 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 1339 CodeGenFunction CGF(CGM, true); 1340 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1341 InnermostKind, HasCancel, Action); 1342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1343 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1344 if (!Tied) 1345 NumberOfParts = Action.getNumberOfParts(); 1346 return Res; 1347 } 1348 1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1350 const RecordDecl *RD, const CGRecordLayout &RL, 1351 ArrayRef<llvm::Constant *> Data) { 1352 llvm::StructType *StructTy = RL.getLLVMType(); 1353 unsigned PrevIdx = 0; 1354 ConstantInitBuilder CIBuilder(CGM); 1355 auto DI = Data.begin(); 1356 for (const FieldDecl *FD : RD->fields()) { 1357 unsigned Idx = RL.getLLVMFieldNo(FD); 1358 // Fill the alignment. 1359 for (unsigned I = PrevIdx; I < Idx; ++I) 1360 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1361 PrevIdx = Idx + 1; 1362 Fields.add(*DI); 1363 ++DI; 1364 } 1365 } 1366 1367 template <class... As> 1368 static llvm::GlobalVariable * 1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1370 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1371 As &&... Args) { 1372 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1373 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1374 ConstantInitBuilder CIBuilder(CGM); 1375 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1376 buildStructValue(Fields, CGM, RD, RL, Data); 1377 return Fields.finishAndCreateGlobal( 1378 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1379 std::forward<As>(Args)...); 1380 } 1381 1382 template <typename T> 1383 static void 1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1385 ArrayRef<llvm::Constant *> Data, 1386 T &Parent) { 1387 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1388 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1389 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1390 buildStructValue(Fields, CGM, RD, RL, Data); 1391 Fields.finishAndAddTo(Parent); 1392 } 1393 1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1395 bool AtCurrentPoint) { 1396 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1397 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1398 1399 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1400 if (AtCurrentPoint) { 1401 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1402 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1403 } else { 1404 Elem.second.ServiceInsertPt = 1405 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1406 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1407 } 1408 } 1409 1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1412 if (Elem.second.ServiceInsertPt) { 1413 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1414 Elem.second.ServiceInsertPt = nullptr; 1415 Ptr->eraseFromParent(); 1416 } 1417 } 1418 1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1420 SourceLocation Loc, 1421 SmallString<128> &Buffer) { 1422 llvm::raw_svector_ostream OS(Buffer); 1423 // Build debug location 1424 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1425 OS << ";" << PLoc.getFilename() << ";"; 1426 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1427 OS << FD->getQualifiedNameAsString(); 1428 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1429 return OS.str(); 1430 } 1431 1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1433 SourceLocation Loc, 1434 unsigned Flags) { 1435 llvm::Constant *SrcLocStr; 1436 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1437 Loc.isInvalid()) { 1438 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1439 } else { 1440 std::string FunctionName = ""; 1441 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1442 FunctionName = FD->getQualifiedNameAsString(); 1443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1444 const char *FileName = PLoc.getFilename(); 1445 unsigned Line = PLoc.getLine(); 1446 unsigned Column = PLoc.getColumn(); 1447 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1448 Line, Column); 1449 } 1450 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1451 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1452 Reserved2Flags); 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1456 SourceLocation Loc) { 1457 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1458 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1459 // the clang invariants used below might be broken. 1460 if (CGM.getLangOpts().OpenMPIRBuilder) { 1461 SmallString<128> Buffer; 1462 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521 } 1522 1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556 } 1557 1558 llvm::FunctionCallee 1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1563 : "__kmpc_for_static_init_4u") 1564 : (IVSigned ? "__kmpc_for_static_init_8" 1565 : "__kmpc_for_static_init_8u"); 1566 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1567 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1568 llvm::Type *TypeParams[] = { 1569 getIdentTyPointerTy(), // loc 1570 CGM.Int32Ty, // tid 1571 CGM.Int32Ty, // schedtype 1572 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1573 PtrTy, // p_lower 1574 PtrTy, // p_upper 1575 PtrTy, // p_stride 1576 ITy, // incr 1577 ITy // chunk 1578 }; 1579 auto *FnTy = 1580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1581 return CGM.CreateRuntimeFunction(FnTy, Name); 1582 } 1583 1584 llvm::FunctionCallee 1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1586 assert((IVSize == 32 || IVSize == 64) && 1587 "IV size is not compatible with the omp runtime"); 1588 StringRef Name = 1589 IVSize == 32 1590 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1591 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1592 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1593 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1594 CGM.Int32Ty, // tid 1595 CGM.Int32Ty, // schedtype 1596 ITy, // lower 1597 ITy, // upper 1598 ITy, // stride 1599 ITy // chunk 1600 }; 1601 auto *FnTy = 1602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1603 return CGM.CreateRuntimeFunction(FnTy, Name); 1604 } 1605 1606 llvm::FunctionCallee 1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1608 assert((IVSize == 32 || IVSize == 64) && 1609 "IV size is not compatible with the omp runtime"); 1610 StringRef Name = 1611 IVSize == 32 1612 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1613 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1614 llvm::Type *TypeParams[] = { 1615 getIdentTyPointerTy(), // loc 1616 CGM.Int32Ty, // tid 1617 }; 1618 auto *FnTy = 1619 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1620 return CGM.CreateRuntimeFunction(FnTy, Name); 1621 } 1622 1623 llvm::FunctionCallee 1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1625 assert((IVSize == 32 || IVSize == 64) && 1626 "IV size is not compatible with the omp runtime"); 1627 StringRef Name = 1628 IVSize == 32 1629 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1630 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1631 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1632 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1633 llvm::Type *TypeParams[] = { 1634 getIdentTyPointerTy(), // loc 1635 CGM.Int32Ty, // tid 1636 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1637 PtrTy, // p_lower 1638 PtrTy, // p_upper 1639 PtrTy // p_stride 1640 }; 1641 auto *FnTy = 1642 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1643 return CGM.CreateRuntimeFunction(FnTy, Name); 1644 } 1645 1646 /// Obtain information that uniquely identifies a target entry. This 1647 /// consists of the file and device IDs as well as line number associated with 1648 /// the relevant entry source location. 1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1650 unsigned &DeviceID, unsigned &FileID, 1651 unsigned &LineNum) { 1652 SourceManager &SM = C.getSourceManager(); 1653 1654 // The loc should be always valid and have a file ID (the user cannot use 1655 // #pragma directives in macros) 1656 1657 assert(Loc.isValid() && "Source location is expected to be always valid."); 1658 1659 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1660 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1661 1662 llvm::sys::fs::UniqueID ID; 1663 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1664 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1666 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1667 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1668 << PLoc.getFilename() << EC.message(); 1669 } 1670 1671 DeviceID = ID.getDevice(); 1672 FileID = ID.getFile(); 1673 LineNum = PLoc.getLine(); 1674 } 1675 1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1677 if (CGM.getLangOpts().OpenMPSimd) 1678 return Address::invalid(); 1679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1681 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1682 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1683 HasRequiresUnifiedSharedMemory))) { 1684 SmallString<64> PtrName; 1685 { 1686 llvm::raw_svector_ostream OS(PtrName); 1687 OS << CGM.getMangledName(GlobalDecl(VD)); 1688 if (!VD->isExternallyVisible()) { 1689 unsigned DeviceID, FileID, Line; 1690 getTargetEntryUniqueInfo(CGM.getContext(), 1691 VD->getCanonicalDecl()->getBeginLoc(), 1692 DeviceID, FileID, Line); 1693 OS << llvm::format("_%x", FileID); 1694 } 1695 OS << "_decl_tgt_ref_ptr"; 1696 } 1697 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1698 if (!Ptr) { 1699 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1700 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1701 PtrName); 1702 1703 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1704 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1705 1706 if (!CGM.getLangOpts().OpenMPIsDevice) 1707 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1708 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1709 } 1710 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1711 } 1712 return Address::invalid(); 1713 } 1714 1715 llvm::Constant * 1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1717 assert(!CGM.getLangOpts().OpenMPUseTLS || 1718 !CGM.getContext().getTargetInfo().isTLSSupported()); 1719 // Lookup the entry, lazily creating it if necessary. 1720 std::string Suffix = getName({"cache", ""}); 1721 return getOrCreateInternalVariable( 1722 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1723 } 1724 1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1726 const VarDecl *VD, 1727 Address VDAddr, 1728 SourceLocation Loc) { 1729 if (CGM.getLangOpts().OpenMPUseTLS && 1730 CGM.getContext().getTargetInfo().isTLSSupported()) 1731 return VDAddr; 1732 1733 llvm::Type *VarTy = VDAddr.getElementType(); 1734 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1735 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1736 CGM.Int8PtrTy), 1737 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1738 getOrCreateThreadPrivateCache(VD)}; 1739 return Address(CGF.EmitRuntimeCall( 1740 OMPBuilder.getOrCreateRuntimeFunction( 1741 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1742 Args), 1743 VDAddr.getAlignment()); 1744 } 1745 1746 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1750 // library. 1751 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1753 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1754 OMPLoc); 1755 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1756 // to register constructor/destructor for variable. 1757 llvm::Value *Args[] = { 1758 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1759 Ctor, CopyCtor, Dtor}; 1760 CGF.EmitRuntimeCall( 1761 OMPBuilder.getOrCreateRuntimeFunction( 1762 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1763 Args); 1764 } 1765 1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1767 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1768 bool PerformInit, CodeGenFunction *CGF) { 1769 if (CGM.getLangOpts().OpenMPUseTLS && 1770 CGM.getContext().getTargetInfo().isTLSSupported()) 1771 return nullptr; 1772 1773 VD = VD->getDefinition(CGM.getContext()); 1774 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1775 QualType ASTTy = VD->getType(); 1776 1777 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1778 const Expr *Init = VD->getAnyInitializer(); 1779 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1780 // Generate function that re-emits the declaration's initializer into the 1781 // threadprivate copy of the variable VD 1782 CodeGenFunction CtorCGF(CGM); 1783 FunctionArgList Args; 1784 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1785 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1786 ImplicitParamDecl::Other); 1787 Args.push_back(&Dst); 1788 1789 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1790 CGM.getContext().VoidPtrTy, Args); 1791 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1792 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1793 llvm::Function *Fn = 1794 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1795 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1796 Args, Loc, Loc); 1797 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1798 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1799 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1800 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1801 Arg = CtorCGF.Builder.CreateElementBitCast( 1802 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1803 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1804 /*IsInitializer=*/true); 1805 ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1809 CtorCGF.FinishFunction(); 1810 Ctor = Fn; 1811 } 1812 if (VD->getType().isDestructedType() != QualType::DK_none) { 1813 // Generate function that emits destructor call for the threadprivate copy 1814 // of the variable VD 1815 CodeGenFunction DtorCGF(CGM); 1816 FunctionArgList Args; 1817 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1818 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1819 ImplicitParamDecl::Other); 1820 Args.push_back(&Dst); 1821 1822 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1823 CGM.getContext().VoidTy, Args); 1824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1825 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1826 llvm::Function *Fn = 1827 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1828 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1829 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1830 Loc, Loc); 1831 // Create a scope with an artificial location for the body of this function. 1832 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1833 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1834 DtorCGF.GetAddrOfLocalVar(&Dst), 1835 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1836 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1837 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1838 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1839 DtorCGF.FinishFunction(); 1840 Dtor = Fn; 1841 } 1842 // Do not emit init function if it is not required. 1843 if (!Ctor && !Dtor) 1844 return nullptr; 1845 1846 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1847 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1848 /*isVarArg=*/false) 1849 ->getPointerTo(); 1850 // Copying constructor for the threadprivate variable. 1851 // Must be NULL - reserved by runtime, but currently it requires that this 1852 // parameter is always NULL. Otherwise it fires assertion. 1853 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1854 if (Ctor == nullptr) { 1855 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 Ctor = llvm::Constant::getNullValue(CtorTy); 1859 } 1860 if (Dtor == nullptr) { 1861 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Dtor = llvm::Constant::getNullValue(DtorTy); 1865 } 1866 if (!CGF) { 1867 auto *InitFunctionTy = 1868 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1869 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1870 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1871 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1872 CodeGenFunction InitCGF(CGM); 1873 FunctionArgList ArgList; 1874 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1875 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1876 Loc, Loc); 1877 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 InitCGF.FinishFunction(); 1879 return InitFunction; 1880 } 1881 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1882 } 1883 return nullptr; 1884 } 1885 1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1887 llvm::GlobalVariable *Addr, 1888 bool PerformInit) { 1889 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1890 !CGM.getLangOpts().OpenMPIsDevice) 1891 return false; 1892 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1893 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1894 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1895 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1896 HasRequiresUnifiedSharedMemory)) 1897 return CGM.getLangOpts().OpenMPIsDevice; 1898 VD = VD->getDefinition(CGM.getContext()); 1899 assert(VD && "Unknown VarDecl"); 1900 1901 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1902 return CGM.getLangOpts().OpenMPIsDevice; 1903 1904 QualType ASTTy = VD->getType(); 1905 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1906 1907 // Produce the unique prefix to identify the new target regions. We use 1908 // the source location of the variable declaration which we know to not 1909 // conflict with any target region. 1910 unsigned DeviceID; 1911 unsigned FileID; 1912 unsigned Line; 1913 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1914 SmallString<128> Buffer, Out; 1915 { 1916 llvm::raw_svector_ostream OS(Buffer); 1917 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1918 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1919 } 1920 1921 const Expr *Init = VD->getAnyInitializer(); 1922 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1923 llvm::Constant *Ctor; 1924 llvm::Constant *ID; 1925 if (CGM.getLangOpts().OpenMPIsDevice) { 1926 // Generate function that re-emits the declaration's initializer into 1927 // the threadprivate copy of the variable VD 1928 CodeGenFunction CtorCGF(CGM); 1929 1930 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1931 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1932 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1933 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1934 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1935 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1938 CtorCGF.EmitAnyExprToMem(Init, 1939 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1940 Init->getType().getQualifiers(), 1941 /*IsInitializer=*/true); 1942 CtorCGF.FinishFunction(); 1943 Ctor = Fn; 1944 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1945 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1946 } else { 1947 Ctor = new llvm::GlobalVariable( 1948 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1949 llvm::GlobalValue::PrivateLinkage, 1950 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1951 ID = Ctor; 1952 } 1953 1954 // Register the information for the entry associated with the constructor. 1955 Out.clear(); 1956 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1957 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1958 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1959 } 1960 if (VD->getType().isDestructedType() != QualType::DK_none) { 1961 llvm::Constant *Dtor; 1962 llvm::Constant *ID; 1963 if (CGM.getLangOpts().OpenMPIsDevice) { 1964 // Generate function that emits destructor call for the threadprivate 1965 // copy of the variable VD 1966 CodeGenFunction DtorCGF(CGM); 1967 1968 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1969 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1970 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1971 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1972 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1973 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1974 FunctionArgList(), Loc, Loc); 1975 // Create a scope with an artificial location for the body of this 1976 // function. 1977 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1978 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1979 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1980 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1981 DtorCGF.FinishFunction(); 1982 Dtor = Fn; 1983 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1984 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1985 } else { 1986 Dtor = new llvm::GlobalVariable( 1987 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1988 llvm::GlobalValue::PrivateLinkage, 1989 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1990 ID = Dtor; 1991 } 1992 // Register the information for the entry associated with the destructor. 1993 Out.clear(); 1994 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1995 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1996 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1997 } 1998 return CGM.getLangOpts().OpenMPIsDevice; 1999 } 2000 2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2002 QualType VarType, 2003 StringRef Name) { 2004 std::string Suffix = getName({"artificial", ""}); 2005 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2006 llvm::Value *GAddr = 2007 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2008 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2009 CGM.getTarget().isTLSSupported()) { 2010 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2011 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2012 } 2013 std::string CacheSuffix = getName({"cache", ""}); 2014 llvm::Value *Args[] = { 2015 emitUpdateLocation(CGF, SourceLocation()), 2016 getThreadID(CGF, SourceLocation()), 2017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2018 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2019 /*isSigned=*/false), 2020 getOrCreateInternalVariable( 2021 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2022 return Address( 2023 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2024 CGF.EmitRuntimeCall( 2025 OMPBuilder.getOrCreateRuntimeFunction( 2026 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2027 Args), 2028 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2029 CGM.getContext().getTypeAlignInChars(VarType)); 2030 } 2031 2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2033 const RegionCodeGenTy &ThenGen, 2034 const RegionCodeGenTy &ElseGen) { 2035 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2036 2037 // If the condition constant folds and can be elided, try to avoid emitting 2038 // the condition and the dead arm of the if/else. 2039 bool CondConstant; 2040 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2041 if (CondConstant) 2042 ThenGen(CGF); 2043 else 2044 ElseGen(CGF); 2045 return; 2046 } 2047 2048 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2049 // emit the conditional branch. 2050 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2051 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2052 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2053 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2054 2055 // Emit the 'then' code. 2056 CGF.EmitBlock(ThenBlock); 2057 ThenGen(CGF); 2058 CGF.EmitBranch(ContBlock); 2059 // Emit the 'else' code if present. 2060 // There is no need to emit line number for unconditional branch. 2061 (void)ApplyDebugLocation::CreateEmpty(CGF); 2062 CGF.EmitBlock(ElseBlock); 2063 ElseGen(CGF); 2064 // There is no need to emit line number for unconditional branch. 2065 (void)ApplyDebugLocation::CreateEmpty(CGF); 2066 CGF.EmitBranch(ContBlock); 2067 // Emit the continuation block for code after the if. 2068 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2069 } 2070 2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2072 llvm::Function *OutlinedFn, 2073 ArrayRef<llvm::Value *> CapturedVars, 2074 const Expr *IfCond) { 2075 if (!CGF.HaveInsertPoint()) 2076 return; 2077 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2078 auto &M = CGM.getModule(); 2079 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2082 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2083 llvm::Value *Args[] = { 2084 RTLoc, 2085 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2086 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2087 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2088 RealArgs.append(std::begin(Args), std::end(Args)); 2089 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2090 2091 llvm::FunctionCallee RTLFn = 2092 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2093 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2094 }; 2095 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2096 this](CodeGenFunction &CGF, PrePostActionTy &) { 2097 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2098 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2099 // Build calls: 2100 // __kmpc_serialized_parallel(&Loc, GTid); 2101 llvm::Value *Args[] = {RTLoc, ThreadID}; 2102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2103 M, OMPRTL___kmpc_serialized_parallel), 2104 Args); 2105 2106 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2107 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2108 Address ZeroAddrBound = 2109 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2110 /*Name=*/".bound.zero.addr"); 2111 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2112 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2113 // ThreadId for serialized parallels is 0. 2114 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2115 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2116 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2117 2118 // Ensure we do not inline the function. This is trivially true for the ones 2119 // passed to __kmpc_fork_call but the ones calles in serialized regions 2120 // could be inlined. This is not a perfect but it is closer to the invariant 2121 // we want, namely, every data environment starts with a new function. 2122 // TODO: We should pass the if condition to the runtime function and do the 2123 // handling there. Much cleaner code. 2124 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2125 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2126 2127 // __kmpc_end_serialized_parallel(&Loc, GTid); 2128 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2130 M, OMPRTL___kmpc_end_serialized_parallel), 2131 EndArgs); 2132 }; 2133 if (IfCond) { 2134 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2135 } else { 2136 RegionCodeGenTy ThenRCG(ThenGen); 2137 ThenRCG(CGF); 2138 } 2139 } 2140 2141 // If we're inside an (outlined) parallel region, use the region info's 2142 // thread-ID variable (it is passed in a first argument of the outlined function 2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2144 // regular serial code region, get thread ID by calling kmp_int32 2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2146 // return the address of that temp. 2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2148 SourceLocation Loc) { 2149 if (auto *OMPRegionInfo = 2150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 if (OMPRegionInfo->getThreadIDVariable()) 2152 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2153 2154 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2155 QualType Int32Ty = 2156 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2157 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2158 CGF.EmitStoreOfScalar(ThreadID, 2159 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2160 2161 return ThreadIDTemp; 2162 } 2163 2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2165 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2166 SmallString<256> Buffer; 2167 llvm::raw_svector_ostream Out(Buffer); 2168 Out << Name; 2169 StringRef RuntimeName = Out.str(); 2170 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2171 if (Elem.second) { 2172 assert(Elem.second->getType()->getPointerElementType() == Ty && 2173 "OMP internal variable has different type than requested"); 2174 return &*Elem.second; 2175 } 2176 2177 return Elem.second = new llvm::GlobalVariable( 2178 CGM.getModule(), Ty, /*IsConstant*/ false, 2179 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2180 Elem.first(), /*InsertBefore=*/nullptr, 2181 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2182 } 2183 2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2185 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2186 std::string Name = getName({Prefix, "var"}); 2187 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2188 } 2189 2190 namespace { 2191 /// Common pre(post)-action for different OpenMP constructs. 2192 class CommonActionTy final : public PrePostActionTy { 2193 llvm::FunctionCallee EnterCallee; 2194 ArrayRef<llvm::Value *> EnterArgs; 2195 llvm::FunctionCallee ExitCallee; 2196 ArrayRef<llvm::Value *> ExitArgs; 2197 bool Conditional; 2198 llvm::BasicBlock *ContBlock = nullptr; 2199 2200 public: 2201 CommonActionTy(llvm::FunctionCallee EnterCallee, 2202 ArrayRef<llvm::Value *> EnterArgs, 2203 llvm::FunctionCallee ExitCallee, 2204 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2205 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2206 ExitArgs(ExitArgs), Conditional(Conditional) {} 2207 void Enter(CodeGenFunction &CGF) override { 2208 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2209 if (Conditional) { 2210 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2211 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2212 ContBlock = CGF.createBasicBlock("omp_if.end"); 2213 // Generate the branch (If-stmt) 2214 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2215 CGF.EmitBlock(ThenBlock); 2216 } 2217 } 2218 void Done(CodeGenFunction &CGF) { 2219 // Emit the rest of blocks/branches 2220 CGF.EmitBranch(ContBlock); 2221 CGF.EmitBlock(ContBlock, true); 2222 } 2223 void Exit(CodeGenFunction &CGF) override { 2224 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2225 } 2226 }; 2227 } // anonymous namespace 2228 2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2230 StringRef CriticalName, 2231 const RegionCodeGenTy &CriticalOpGen, 2232 SourceLocation Loc, const Expr *Hint) { 2233 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2234 // CriticalOpGen(); 2235 // __kmpc_end_critical(ident_t *, gtid, Lock); 2236 // Prepare arguments and build a call to __kmpc_critical 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2240 getCriticalRegionLock(CriticalName)}; 2241 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2242 std::end(Args)); 2243 if (Hint) { 2244 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2245 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2246 } 2247 CommonActionTy Action( 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), 2250 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2251 EnterArgs, 2252 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2253 OMPRTL___kmpc_end_critical), 2254 Args); 2255 CriticalOpGen.setAction(Action); 2256 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2257 } 2258 2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2260 const RegionCodeGenTy &MasterOpGen, 2261 SourceLocation Loc) { 2262 if (!CGF.HaveInsertPoint()) 2263 return; 2264 // if(__kmpc_master(ident_t *, gtid)) { 2265 // MasterOpGen(); 2266 // __kmpc_end_master(ident_t *, gtid); 2267 // } 2268 // Prepare arguments and build a call to __kmpc_master 2269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2270 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2271 CGM.getModule(), OMPRTL___kmpc_master), 2272 Args, 2273 OMPBuilder.getOrCreateRuntimeFunction( 2274 CGM.getModule(), OMPRTL___kmpc_end_master), 2275 Args, 2276 /*Conditional=*/true); 2277 MasterOpGen.setAction(Action); 2278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2279 Action.Done(CGF); 2280 } 2281 2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2283 const RegionCodeGenTy &MaskedOpGen, 2284 SourceLocation Loc, const Expr *Filter) { 2285 if (!CGF.HaveInsertPoint()) 2286 return; 2287 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2288 // MaskedOpGen(); 2289 // __kmpc_end_masked(iden_t *, gtid); 2290 // } 2291 // Prepare arguments and build a call to __kmpc_masked 2292 llvm::Value *FilterVal = Filter 2293 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2294 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2296 FilterVal}; 2297 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2298 getThreadID(CGF, Loc)}; 2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_masked), 2301 Args, 2302 OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_end_masked), 2304 ArgsEnd, 2305 /*Conditional=*/true); 2306 MaskedOpGen.setAction(Action); 2307 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2308 Action.Done(CGF); 2309 } 2310 2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2312 SourceLocation Loc) { 2313 if (!CGF.HaveInsertPoint()) 2314 return; 2315 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2316 OMPBuilder.createTaskyield(CGF.Builder); 2317 } else { 2318 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2319 llvm::Value *Args[] = { 2320 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2321 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2323 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2324 Args); 2325 } 2326 2327 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2328 Region->emitUntiedSwitch(CGF); 2329 } 2330 2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2332 const RegionCodeGenTy &TaskgroupOpGen, 2333 SourceLocation Loc) { 2334 if (!CGF.HaveInsertPoint()) 2335 return; 2336 // __kmpc_taskgroup(ident_t *, gtid); 2337 // TaskgroupOpGen(); 2338 // __kmpc_end_taskgroup(ident_t *, gtid); 2339 // Prepare arguments and build a call to __kmpc_taskgroup 2340 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2341 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2342 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2343 Args, 2344 OMPBuilder.getOrCreateRuntimeFunction( 2345 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2346 Args); 2347 TaskgroupOpGen.setAction(Action); 2348 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2349 } 2350 2351 /// Given an array of pointers to variables, project the address of a 2352 /// given variable. 2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2354 unsigned Index, const VarDecl *Var) { 2355 // Pull out the pointer to the variable. 2356 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2357 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2358 2359 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2360 Addr = CGF.Builder.CreateElementBitCast( 2361 Addr, CGF.ConvertTypeForMem(Var->getType())); 2362 return Addr; 2363 } 2364 2365 static llvm::Value *emitCopyprivateCopyFunction( 2366 CodeGenModule &CGM, llvm::Type *ArgsType, 2367 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2368 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2369 SourceLocation Loc) { 2370 ASTContext &C = CGM.getContext(); 2371 // void copy_func(void *LHSArg, void *RHSArg); 2372 FunctionArgList Args; 2373 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2374 ImplicitParamDecl::Other); 2375 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2376 ImplicitParamDecl::Other); 2377 Args.push_back(&LHSArg); 2378 Args.push_back(&RHSArg); 2379 const auto &CGFI = 2380 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2381 std::string Name = 2382 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2383 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2384 llvm::GlobalValue::InternalLinkage, Name, 2385 &CGM.getModule()); 2386 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2387 Fn->setDoesNotRecurse(); 2388 CodeGenFunction CGF(CGM); 2389 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2390 // Dest = (void*[n])(LHSArg); 2391 // Src = (void*[n])(RHSArg); 2392 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2393 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2394 ArgsType), CGF.getPointerAlign()); 2395 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2397 ArgsType), CGF.getPointerAlign()); 2398 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2399 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2400 // ... 2401 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2402 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2403 const auto *DestVar = 2404 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2405 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2406 2407 const auto *SrcVar = 2408 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2409 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2410 2411 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2412 QualType Type = VD->getType(); 2413 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2414 } 2415 CGF.FinishFunction(); 2416 return Fn; 2417 } 2418 2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2420 const RegionCodeGenTy &SingleOpGen, 2421 SourceLocation Loc, 2422 ArrayRef<const Expr *> CopyprivateVars, 2423 ArrayRef<const Expr *> SrcExprs, 2424 ArrayRef<const Expr *> DstExprs, 2425 ArrayRef<const Expr *> AssignmentOps) { 2426 if (!CGF.HaveInsertPoint()) 2427 return; 2428 assert(CopyprivateVars.size() == SrcExprs.size() && 2429 CopyprivateVars.size() == DstExprs.size() && 2430 CopyprivateVars.size() == AssignmentOps.size()); 2431 ASTContext &C = CGM.getContext(); 2432 // int32 did_it = 0; 2433 // if(__kmpc_single(ident_t *, gtid)) { 2434 // SingleOpGen(); 2435 // __kmpc_end_single(ident_t *, gtid); 2436 // did_it = 1; 2437 // } 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 2441 Address DidIt = Address::invalid(); 2442 if (!CopyprivateVars.empty()) { 2443 // int32 did_it = 0; 2444 QualType KmpInt32Ty = 2445 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2446 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2447 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2448 } 2449 // Prepare arguments and build a call to __kmpc_single 2450 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2451 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2452 CGM.getModule(), OMPRTL___kmpc_single), 2453 Args, 2454 OMPBuilder.getOrCreateRuntimeFunction( 2455 CGM.getModule(), OMPRTL___kmpc_end_single), 2456 Args, 2457 /*Conditional=*/true); 2458 SingleOpGen.setAction(Action); 2459 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2460 if (DidIt.isValid()) { 2461 // did_it = 1; 2462 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2463 } 2464 Action.Done(CGF); 2465 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2466 // <copy_func>, did_it); 2467 if (DidIt.isValid()) { 2468 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2469 QualType CopyprivateArrayTy = C.getConstantArrayType( 2470 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2471 /*IndexTypeQuals=*/0); 2472 // Create a list of all private variables for copyprivate. 2473 Address CopyprivateList = 2474 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2475 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2476 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2477 CGF.Builder.CreateStore( 2478 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2479 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2480 CGF.VoidPtrTy), 2481 Elem); 2482 } 2483 // Build function that copies private values from single region to all other 2484 // threads in the corresponding parallel region. 2485 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2486 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2487 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2488 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2489 Address CL = 2490 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2491 CGF.VoidPtrTy); 2492 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2493 llvm::Value *Args[] = { 2494 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2495 getThreadID(CGF, Loc), // i32 <gtid> 2496 BufSize, // size_t <buf_size> 2497 CL.getPointer(), // void *<copyprivate list> 2498 CpyFn, // void (*) (void *, void *) <copy_func> 2499 DidItVal // i32 did_it 2500 }; 2501 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2502 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2503 Args); 2504 } 2505 } 2506 2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2508 const RegionCodeGenTy &OrderedOpGen, 2509 SourceLocation Loc, bool IsThreads) { 2510 if (!CGF.HaveInsertPoint()) 2511 return; 2512 // __kmpc_ordered(ident_t *, gtid); 2513 // OrderedOpGen(); 2514 // __kmpc_end_ordered(ident_t *, gtid); 2515 // Prepare arguments and build a call to __kmpc_ordered 2516 if (IsThreads) { 2517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2518 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2519 CGM.getModule(), OMPRTL___kmpc_ordered), 2520 Args, 2521 OMPBuilder.getOrCreateRuntimeFunction( 2522 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2523 Args); 2524 OrderedOpGen.setAction(Action); 2525 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2526 return; 2527 } 2528 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2529 } 2530 2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2532 unsigned Flags; 2533 if (Kind == OMPD_for) 2534 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2535 else if (Kind == OMPD_sections) 2536 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2537 else if (Kind == OMPD_single) 2538 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2539 else if (Kind == OMPD_barrier) 2540 Flags = OMP_IDENT_BARRIER_EXPL; 2541 else 2542 Flags = OMP_IDENT_BARRIER_IMPL; 2543 return Flags; 2544 } 2545 2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2547 CodeGenFunction &CGF, const OMPLoopDirective &S, 2548 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2549 // Check if the loop directive is actually a doacross loop directive. In this 2550 // case choose static, 1 schedule. 2551 if (llvm::any_of( 2552 S.getClausesOfKind<OMPOrderedClause>(), 2553 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2554 ScheduleKind = OMPC_SCHEDULE_static; 2555 // Chunk size is 1 in this case. 2556 llvm::APInt ChunkSize(32, 1); 2557 ChunkExpr = IntegerLiteral::Create( 2558 CGF.getContext(), ChunkSize, 2559 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2560 SourceLocation()); 2561 } 2562 } 2563 2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2565 OpenMPDirectiveKind Kind, bool EmitChecks, 2566 bool ForceSimpleCall) { 2567 // Check if we should use the OMPBuilder 2568 auto *OMPRegionInfo = 2569 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2570 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2571 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2572 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2573 return; 2574 } 2575 2576 if (!CGF.HaveInsertPoint()) 2577 return; 2578 // Build call __kmpc_cancel_barrier(loc, thread_id); 2579 // Build call __kmpc_barrier(loc, thread_id); 2580 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2581 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2582 // thread_id); 2583 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2584 getThreadID(CGF, Loc)}; 2585 if (OMPRegionInfo) { 2586 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2587 llvm::Value *Result = CGF.EmitRuntimeCall( 2588 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2589 OMPRTL___kmpc_cancel_barrier), 2590 Args); 2591 if (EmitChecks) { 2592 // if (__kmpc_cancel_barrier()) { 2593 // exit from construct; 2594 // } 2595 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2596 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2597 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2598 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2599 CGF.EmitBlock(ExitBB); 2600 // exit from construct; 2601 CodeGenFunction::JumpDest CancelDestination = 2602 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2603 CGF.EmitBranchThroughCleanup(CancelDestination); 2604 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2605 } 2606 return; 2607 } 2608 } 2609 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2610 CGM.getModule(), OMPRTL___kmpc_barrier), 2611 Args); 2612 } 2613 2614 /// Map the OpenMP loop schedule to the runtime enumeration. 2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked, bool Ordered) { 2617 switch (ScheduleKind) { 2618 case OMPC_SCHEDULE_static: 2619 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2620 : (Ordered ? OMP_ord_static : OMP_sch_static); 2621 case OMPC_SCHEDULE_dynamic: 2622 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2623 case OMPC_SCHEDULE_guided: 2624 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2625 case OMPC_SCHEDULE_runtime: 2626 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2627 case OMPC_SCHEDULE_auto: 2628 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2629 case OMPC_SCHEDULE_unknown: 2630 assert(!Chunked && "chunk was specified but schedule kind not known"); 2631 return Ordered ? OMP_ord_static : OMP_sch_static; 2632 } 2633 llvm_unreachable("Unexpected runtime schedule"); 2634 } 2635 2636 /// Map the OpenMP distribute schedule to the runtime enumeration. 2637 static OpenMPSchedType 2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2639 // only static is allowed for dist_schedule 2640 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2641 } 2642 2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2644 bool Chunked) const { 2645 OpenMPSchedType Schedule = 2646 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2647 return Schedule == OMP_sch_static; 2648 } 2649 2650 bool CGOpenMPRuntime::isStaticNonchunked( 2651 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2652 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2653 return Schedule == OMP_dist_sch_static; 2654 } 2655 2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2657 bool Chunked) const { 2658 OpenMPSchedType Schedule = 2659 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2660 return Schedule == OMP_sch_static_chunked; 2661 } 2662 2663 bool CGOpenMPRuntime::isStaticChunked( 2664 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2665 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2666 return Schedule == OMP_dist_sch_static_chunked; 2667 } 2668 2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2672 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2673 return Schedule != OMP_sch_static; 2674 } 2675 2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2677 OpenMPScheduleClauseModifier M1, 2678 OpenMPScheduleClauseModifier M2) { 2679 int Modifier = 0; 2680 switch (M1) { 2681 case OMPC_SCHEDULE_MODIFIER_monotonic: 2682 Modifier = OMP_sch_modifier_monotonic; 2683 break; 2684 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2685 Modifier = OMP_sch_modifier_nonmonotonic; 2686 break; 2687 case OMPC_SCHEDULE_MODIFIER_simd: 2688 if (Schedule == OMP_sch_static_chunked) 2689 Schedule = OMP_sch_static_balanced_chunked; 2690 break; 2691 case OMPC_SCHEDULE_MODIFIER_last: 2692 case OMPC_SCHEDULE_MODIFIER_unknown: 2693 break; 2694 } 2695 switch (M2) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2711 // If the static schedule kind is specified or if the ordered clause is 2712 // specified, and if the nonmonotonic modifier is not specified, the effect is 2713 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2714 // modifier is specified, the effect is as if the nonmonotonic modifier is 2715 // specified. 2716 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2717 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2718 Schedule == OMP_sch_static_balanced_chunked || 2719 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2720 Schedule == OMP_dist_sch_static_chunked || 2721 Schedule == OMP_dist_sch_static)) 2722 Modifier = OMP_sch_modifier_nonmonotonic; 2723 } 2724 return Schedule | Modifier; 2725 } 2726 2727 void CGOpenMPRuntime::emitForDispatchInit( 2728 CodeGenFunction &CGF, SourceLocation Loc, 2729 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2730 bool Ordered, const DispatchRTInput &DispatchValues) { 2731 if (!CGF.HaveInsertPoint()) 2732 return; 2733 OpenMPSchedType Schedule = getRuntimeSchedule( 2734 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2735 assert(Ordered || 2736 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2737 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2738 Schedule != OMP_sch_static_balanced_chunked)); 2739 // Call __kmpc_dispatch_init( 2740 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2741 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2742 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2743 2744 // If the Chunk was not specified in the clause - use default value 1. 2745 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2746 : CGF.Builder.getIntN(IVSize, 1); 2747 llvm::Value *Args[] = { 2748 emitUpdateLocation(CGF, Loc), 2749 getThreadID(CGF, Loc), 2750 CGF.Builder.getInt32(addMonoNonMonoModifier( 2751 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2752 DispatchValues.LB, // Lower 2753 DispatchValues.UB, // Upper 2754 CGF.Builder.getIntN(IVSize, 1), // Stride 2755 Chunk // Chunk 2756 }; 2757 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2758 } 2759 2760 static void emitForStaticInitCall( 2761 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2762 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2763 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2764 const CGOpenMPRuntime::StaticRTInput &Values) { 2765 if (!CGF.HaveInsertPoint()) 2766 return; 2767 2768 assert(!Values.Ordered); 2769 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2770 Schedule == OMP_sch_static_balanced_chunked || 2771 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2772 Schedule == OMP_dist_sch_static || 2773 Schedule == OMP_dist_sch_static_chunked); 2774 2775 // Call __kmpc_for_static_init( 2776 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2777 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2778 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2779 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2780 llvm::Value *Chunk = Values.Chunk; 2781 if (Chunk == nullptr) { 2782 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2783 Schedule == OMP_dist_sch_static) && 2784 "expected static non-chunked schedule"); 2785 // If the Chunk was not specified in the clause - use default value 1. 2786 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2787 } else { 2788 assert((Schedule == OMP_sch_static_chunked || 2789 Schedule == OMP_sch_static_balanced_chunked || 2790 Schedule == OMP_ord_static_chunked || 2791 Schedule == OMP_dist_sch_static_chunked) && 2792 "expected static chunked schedule"); 2793 } 2794 llvm::Value *Args[] = { 2795 UpdateLocation, 2796 ThreadId, 2797 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2798 M2)), // Schedule type 2799 Values.IL.getPointer(), // &isLastIter 2800 Values.LB.getPointer(), // &LB 2801 Values.UB.getPointer(), // &UB 2802 Values.ST.getPointer(), // &Stride 2803 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2804 Chunk // Chunk 2805 }; 2806 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2807 } 2808 2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2810 SourceLocation Loc, 2811 OpenMPDirectiveKind DKind, 2812 const OpenMPScheduleTy &ScheduleKind, 2813 const StaticRTInput &Values) { 2814 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2815 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2816 assert(isOpenMPWorksharingDirective(DKind) && 2817 "Expected loop-based or sections-based directive."); 2818 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2819 isOpenMPLoopDirective(DKind) 2820 ? OMP_IDENT_WORK_LOOP 2821 : OMP_IDENT_WORK_SECTIONS); 2822 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2823 llvm::FunctionCallee StaticInitFunction = 2824 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2825 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2826 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2827 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2828 } 2829 2830 void CGOpenMPRuntime::emitDistributeStaticInit( 2831 CodeGenFunction &CGF, SourceLocation Loc, 2832 OpenMPDistScheduleClauseKind SchedKind, 2833 const CGOpenMPRuntime::StaticRTInput &Values) { 2834 OpenMPSchedType ScheduleNum = 2835 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2836 llvm::Value *UpdatedLocation = 2837 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2839 llvm::FunctionCallee StaticInitFunction = 2840 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2843 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2844 } 2845 2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2847 SourceLocation Loc, 2848 OpenMPDirectiveKind DKind) { 2849 if (!CGF.HaveInsertPoint()) 2850 return; 2851 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2852 llvm::Value *Args[] = { 2853 emitUpdateLocation(CGF, Loc, 2854 isOpenMPDistributeDirective(DKind) 2855 ? OMP_IDENT_WORK_DISTRIBUTE 2856 : isOpenMPLoopDirective(DKind) 2857 ? OMP_IDENT_WORK_LOOP 2858 : OMP_IDENT_WORK_SECTIONS), 2859 getThreadID(CGF, Loc)}; 2860 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2861 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2862 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2863 Args); 2864 } 2865 2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 unsigned IVSize, 2869 bool IVSigned) { 2870 if (!CGF.HaveInsertPoint()) 2871 return; 2872 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2873 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2874 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2875 } 2876 2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2878 SourceLocation Loc, unsigned IVSize, 2879 bool IVSigned, Address IL, 2880 Address LB, Address UB, 2881 Address ST) { 2882 // Call __kmpc_dispatch_next( 2883 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2884 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2885 // kmp_int[32|64] *p_stride); 2886 llvm::Value *Args[] = { 2887 emitUpdateLocation(CGF, Loc), 2888 getThreadID(CGF, Loc), 2889 IL.getPointer(), // &isLastIter 2890 LB.getPointer(), // &Lower 2891 UB.getPointer(), // &Upper 2892 ST.getPointer() // &Stride 2893 }; 2894 llvm::Value *Call = 2895 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2896 return CGF.EmitScalarConversion( 2897 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2898 CGF.getContext().BoolTy, Loc); 2899 } 2900 2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2902 llvm::Value *NumThreads, 2903 SourceLocation Loc) { 2904 if (!CGF.HaveInsertPoint()) 2905 return; 2906 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2907 llvm::Value *Args[] = { 2908 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2909 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2910 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2911 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2912 Args); 2913 } 2914 2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2916 ProcBindKind ProcBind, 2917 SourceLocation Loc) { 2918 if (!CGF.HaveInsertPoint()) 2919 return; 2920 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2921 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2922 llvm::Value *Args[] = { 2923 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2924 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2926 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2927 Args); 2928 } 2929 2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2931 SourceLocation Loc, llvm::AtomicOrdering AO) { 2932 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2933 OMPBuilder.createFlush(CGF.Builder); 2934 } else { 2935 if (!CGF.HaveInsertPoint()) 2936 return; 2937 // Build call void __kmpc_flush(ident_t *loc) 2938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2939 CGM.getModule(), OMPRTL___kmpc_flush), 2940 emitUpdateLocation(CGF, Loc)); 2941 } 2942 } 2943 2944 namespace { 2945 /// Indexes of fields for type kmp_task_t. 2946 enum KmpTaskTFields { 2947 /// List of shared variables. 2948 KmpTaskTShareds, 2949 /// Task routine. 2950 KmpTaskTRoutine, 2951 /// Partition id for the untied tasks. 2952 KmpTaskTPartId, 2953 /// Function with call of destructors for private variables. 2954 Data1, 2955 /// Task priority. 2956 Data2, 2957 /// (Taskloops only) Lower bound. 2958 KmpTaskTLowerBound, 2959 /// (Taskloops only) Upper bound. 2960 KmpTaskTUpperBound, 2961 /// (Taskloops only) Stride. 2962 KmpTaskTStride, 2963 /// (Taskloops only) Is last iteration flag. 2964 KmpTaskTLastIter, 2965 /// (Taskloops only) Reduction data. 2966 KmpTaskTReductions, 2967 }; 2968 } // anonymous namespace 2969 2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2971 return OffloadEntriesTargetRegion.empty() && 2972 OffloadEntriesDeviceGlobalVar.empty(); 2973 } 2974 2975 /// Initialize target region entry. 2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2977 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2978 StringRef ParentName, unsigned LineNum, 2979 unsigned Order) { 2980 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2981 "only required for the device " 2982 "code generation."); 2983 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2984 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2985 OMPTargetRegionEntryTargetRegion); 2986 ++OffloadingEntriesNum; 2987 } 2988 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 llvm::Constant *Addr, llvm::Constant *ID, 2993 OMPTargetRegionEntryKind Flags) { 2994 // If we are emitting code for a target, the entry is already initialized, 2995 // only has to be registered. 2996 if (CGM.getLangOpts().OpenMPIsDevice) { 2997 // This could happen if the device compilation is invoked standalone. 2998 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2999 return; 3000 auto &Entry = 3001 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3002 Entry.setAddress(Addr); 3003 Entry.setID(ID); 3004 Entry.setFlags(Flags); 3005 } else { 3006 if (Flags == 3007 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3008 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3009 /*IgnoreAddressId*/ true)) 3010 return; 3011 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3012 "Target region entry already registered!"); 3013 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3014 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3015 ++OffloadingEntriesNum; 3016 } 3017 } 3018 3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3020 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3021 bool IgnoreAddressId) const { 3022 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3023 if (PerDevice == OffloadEntriesTargetRegion.end()) 3024 return false; 3025 auto PerFile = PerDevice->second.find(FileID); 3026 if (PerFile == PerDevice->second.end()) 3027 return false; 3028 auto PerParentName = PerFile->second.find(ParentName); 3029 if (PerParentName == PerFile->second.end()) 3030 return false; 3031 auto PerLine = PerParentName->second.find(LineNum); 3032 if (PerLine == PerParentName->second.end()) 3033 return false; 3034 // Fail if this entry is already registered. 3035 if (!IgnoreAddressId && 3036 (PerLine->second.getAddress() || PerLine->second.getID())) 3037 return false; 3038 return true; 3039 } 3040 3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3042 const OffloadTargetRegionEntryInfoActTy &Action) { 3043 // Scan all target region entries and perform the provided action. 3044 for (const auto &D : OffloadEntriesTargetRegion) 3045 for (const auto &F : D.second) 3046 for (const auto &P : F.second) 3047 for (const auto &L : P.second) 3048 Action(D.first, F.first, P.first(), L.first, L.second); 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3052 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3053 OMPTargetGlobalVarEntryKind Flags, 3054 unsigned Order) { 3055 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3056 "only required for the device " 3057 "code generation."); 3058 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3059 ++OffloadingEntriesNum; 3060 } 3061 3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3063 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3064 CharUnits VarSize, 3065 OMPTargetGlobalVarEntryKind Flags, 3066 llvm::GlobalValue::LinkageTypes Linkage) { 3067 if (CGM.getLangOpts().OpenMPIsDevice) { 3068 // This could happen if the device compilation is invoked standalone. 3069 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3070 return; 3071 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3072 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3073 if (Entry.getVarSize().isZero()) { 3074 Entry.setVarSize(VarSize); 3075 Entry.setLinkage(Linkage); 3076 } 3077 return; 3078 } 3079 Entry.setVarSize(VarSize); 3080 Entry.setLinkage(Linkage); 3081 Entry.setAddress(Addr); 3082 } else { 3083 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3084 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3085 assert(Entry.isValid() && Entry.getFlags() == Flags && 3086 "Entry not initialized!"); 3087 if (Entry.getVarSize().isZero()) { 3088 Entry.setVarSize(VarSize); 3089 Entry.setLinkage(Linkage); 3090 } 3091 return; 3092 } 3093 OffloadEntriesDeviceGlobalVar.try_emplace( 3094 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3095 ++OffloadingEntriesNum; 3096 } 3097 } 3098 3099 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3100 actOnDeviceGlobalVarEntriesInfo( 3101 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3102 // Scan all target region entries and perform the provided action. 3103 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3104 Action(E.getKey(), E.getValue()); 3105 } 3106 3107 void CGOpenMPRuntime::createOffloadEntry( 3108 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3109 llvm::GlobalValue::LinkageTypes Linkage) { 3110 StringRef Name = Addr->getName(); 3111 llvm::Module &M = CGM.getModule(); 3112 llvm::LLVMContext &C = M.getContext(); 3113 3114 // Create constant string with the name. 3115 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3116 3117 std::string StringName = getName({"omp_offloading", "entry_name"}); 3118 auto *Str = new llvm::GlobalVariable( 3119 M, StrPtrInit->getType(), /*isConstant=*/true, 3120 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3121 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3122 3123 llvm::Constant *Data[] = { 3124 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3125 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3126 llvm::ConstantInt::get(CGM.SizeTy, Size), 3127 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3128 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3129 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3130 llvm::GlobalVariable *Entry = createGlobalStruct( 3131 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3132 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3133 3134 // The entry has to be created in the section the linker expects it to be. 3135 Entry->setSection("omp_offloading_entries"); 3136 } 3137 3138 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3139 // Emit the offloading entries and metadata so that the device codegen side 3140 // can easily figure out what to emit. The produced metadata looks like 3141 // this: 3142 // 3143 // !omp_offload.info = !{!1, ...} 3144 // 3145 // Right now we only generate metadata for function that contain target 3146 // regions. 3147 3148 // If we are in simd mode or there are no entries, we don't need to do 3149 // anything. 3150 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3151 return; 3152 3153 llvm::Module &M = CGM.getModule(); 3154 llvm::LLVMContext &C = M.getContext(); 3155 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3156 SourceLocation, StringRef>, 3157 16> 3158 OrderedEntries(OffloadEntriesInfoManager.size()); 3159 llvm::SmallVector<StringRef, 16> ParentFunctions( 3160 OffloadEntriesInfoManager.size()); 3161 3162 // Auxiliary methods to create metadata values and strings. 3163 auto &&GetMDInt = [this](unsigned V) { 3164 return llvm::ConstantAsMetadata::get( 3165 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3166 }; 3167 3168 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3169 3170 // Create the offloading info metadata node. 3171 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3172 3173 // Create function that emits metadata for each target region entry; 3174 auto &&TargetRegionMetadataEmitter = 3175 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3176 &GetMDString]( 3177 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3178 unsigned Line, 3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3180 // Generate metadata for target regions. Each entry of this metadata 3181 // contains: 3182 // - Entry 0 -> Kind of this type of metadata (0). 3183 // - Entry 1 -> Device ID of the file where the entry was identified. 3184 // - Entry 2 -> File ID of the file where the entry was identified. 3185 // - Entry 3 -> Mangled name of the function where the entry was 3186 // identified. 3187 // - Entry 4 -> Line in the file where the entry was identified. 3188 // - Entry 5 -> Order the entry was created. 3189 // The first element of the metadata node is the kind. 3190 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3191 GetMDInt(FileID), GetMDString(ParentName), 3192 GetMDInt(Line), GetMDInt(E.getOrder())}; 3193 3194 SourceLocation Loc; 3195 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3196 E = CGM.getContext().getSourceManager().fileinfo_end(); 3197 I != E; ++I) { 3198 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3199 I->getFirst()->getUniqueID().getFile() == FileID) { 3200 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3201 I->getFirst(), Line, 1); 3202 break; 3203 } 3204 } 3205 // Save this entry in the right position of the ordered entries array. 3206 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3207 ParentFunctions[E.getOrder()] = ParentName; 3208 3209 // Add metadata to the named metadata node. 3210 MD->addOperand(llvm::MDNode::get(C, Ops)); 3211 }; 3212 3213 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3214 TargetRegionMetadataEmitter); 3215 3216 // Create function that emits metadata for each device global variable entry; 3217 auto &&DeviceGlobalVarMetadataEmitter = 3218 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3219 MD](StringRef MangledName, 3220 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3221 &E) { 3222 // Generate metadata for global variables. Each entry of this metadata 3223 // contains: 3224 // - Entry 0 -> Kind of this type of metadata (1). 3225 // - Entry 1 -> Mangled name of the variable. 3226 // - Entry 2 -> Declare target kind. 3227 // - Entry 3 -> Order the entry was created. 3228 // The first element of the metadata node is the kind. 3229 llvm::Metadata *Ops[] = { 3230 GetMDInt(E.getKind()), GetMDString(MangledName), 3231 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3232 3233 // Save this entry in the right position of the ordered entries array. 3234 OrderedEntries[E.getOrder()] = 3235 std::make_tuple(&E, SourceLocation(), MangledName); 3236 3237 // Add metadata to the named metadata node. 3238 MD->addOperand(llvm::MDNode::get(C, Ops)); 3239 }; 3240 3241 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3242 DeviceGlobalVarMetadataEmitter); 3243 3244 for (const auto &E : OrderedEntries) { 3245 assert(std::get<0>(E) && "All ordered entries must exist!"); 3246 if (const auto *CE = 3247 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3248 std::get<0>(E))) { 3249 if (!CE->getID() || !CE->getAddress()) { 3250 // Do not blame the entry if the parent funtion is not emitted. 3251 StringRef FnName = ParentFunctions[CE->getOrder()]; 3252 if (!CGM.GetGlobalValue(FnName)) 3253 continue; 3254 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3255 DiagnosticsEngine::Error, 3256 "Offloading entry for target region in %0 is incorrect: either the " 3257 "address or the ID is invalid."); 3258 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3259 continue; 3260 } 3261 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3262 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3263 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3264 OffloadEntryInfoDeviceGlobalVar>( 3265 std::get<0>(E))) { 3266 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3267 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3268 CE->getFlags()); 3269 switch (Flags) { 3270 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3271 if (CGM.getLangOpts().OpenMPIsDevice && 3272 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3273 continue; 3274 if (!CE->getAddress()) { 3275 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3276 DiagnosticsEngine::Error, "Offloading entry for declare target " 3277 "variable %0 is incorrect: the " 3278 "address is invalid."); 3279 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3280 continue; 3281 } 3282 // The vaiable has no definition - no need to add the entry. 3283 if (CE->getVarSize().isZero()) 3284 continue; 3285 break; 3286 } 3287 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3288 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3289 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3290 "Declaret target link address is set."); 3291 if (CGM.getLangOpts().OpenMPIsDevice) 3292 continue; 3293 if (!CE->getAddress()) { 3294 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3295 DiagnosticsEngine::Error, 3296 "Offloading entry for declare target variable is incorrect: the " 3297 "address is invalid."); 3298 CGM.getDiags().Report(DiagID); 3299 continue; 3300 } 3301 break; 3302 } 3303 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3304 CE->getVarSize().getQuantity(), Flags, 3305 CE->getLinkage()); 3306 } else { 3307 llvm_unreachable("Unsupported entry kind."); 3308 } 3309 } 3310 } 3311 3312 /// Loads all the offload entries information from the host IR 3313 /// metadata. 3314 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3315 // If we are in target mode, load the metadata from the host IR. This code has 3316 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3317 3318 if (!CGM.getLangOpts().OpenMPIsDevice) 3319 return; 3320 3321 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3322 return; 3323 3324 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3325 if (auto EC = Buf.getError()) { 3326 CGM.getDiags().Report(diag::err_cannot_open_file) 3327 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3328 return; 3329 } 3330 3331 llvm::LLVMContext C; 3332 auto ME = expectedToErrorOrAndEmitErrors( 3333 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3334 3335 if (auto EC = ME.getError()) { 3336 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3337 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3338 CGM.getDiags().Report(DiagID) 3339 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3340 return; 3341 } 3342 3343 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3344 if (!MD) 3345 return; 3346 3347 for (llvm::MDNode *MN : MD->operands()) { 3348 auto &&GetMDInt = [MN](unsigned Idx) { 3349 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3350 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3351 }; 3352 3353 auto &&GetMDString = [MN](unsigned Idx) { 3354 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3355 return V->getString(); 3356 }; 3357 3358 switch (GetMDInt(0)) { 3359 default: 3360 llvm_unreachable("Unexpected metadata!"); 3361 break; 3362 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3363 OffloadingEntryInfoTargetRegion: 3364 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3365 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3366 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3367 /*Order=*/GetMDInt(5)); 3368 break; 3369 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3370 OffloadingEntryInfoDeviceGlobalVar: 3371 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3372 /*MangledName=*/GetMDString(1), 3373 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3374 /*Flags=*/GetMDInt(2)), 3375 /*Order=*/GetMDInt(3)); 3376 break; 3377 } 3378 } 3379 } 3380 3381 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3382 if (!KmpRoutineEntryPtrTy) { 3383 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3384 ASTContext &C = CGM.getContext(); 3385 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3386 FunctionProtoType::ExtProtoInfo EPI; 3387 KmpRoutineEntryPtrQTy = C.getPointerType( 3388 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3389 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3390 } 3391 } 3392 3393 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3394 // Make sure the type of the entry is already created. This is the type we 3395 // have to create: 3396 // struct __tgt_offload_entry{ 3397 // void *addr; // Pointer to the offload entry info. 3398 // // (function or global) 3399 // char *name; // Name of the function or global. 3400 // size_t size; // Size of the entry info (0 if it a function). 3401 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3402 // int32_t reserved; // Reserved, to use by the runtime library. 3403 // }; 3404 if (TgtOffloadEntryQTy.isNull()) { 3405 ASTContext &C = CGM.getContext(); 3406 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3407 RD->startDefinition(); 3408 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3409 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3410 addFieldToRecordDecl(C, RD, C.getSizeType()); 3411 addFieldToRecordDecl( 3412 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3413 addFieldToRecordDecl( 3414 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3415 RD->completeDefinition(); 3416 RD->addAttr(PackedAttr::CreateImplicit(C)); 3417 TgtOffloadEntryQTy = C.getRecordType(RD); 3418 } 3419 return TgtOffloadEntryQTy; 3420 } 3421 3422 namespace { 3423 struct PrivateHelpersTy { 3424 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3425 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3426 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3427 PrivateElemInit(PrivateElemInit) {} 3428 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3429 const Expr *OriginalRef = nullptr; 3430 const VarDecl *Original = nullptr; 3431 const VarDecl *PrivateCopy = nullptr; 3432 const VarDecl *PrivateElemInit = nullptr; 3433 bool isLocalPrivate() const { 3434 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3435 } 3436 }; 3437 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3438 } // anonymous namespace 3439 3440 static bool isAllocatableDecl(const VarDecl *VD) { 3441 const VarDecl *CVD = VD->getCanonicalDecl(); 3442 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3443 return false; 3444 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3445 // Use the default allocation. 3446 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3447 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3448 !AA->getAllocator()); 3449 } 3450 3451 static RecordDecl * 3452 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3453 if (!Privates.empty()) { 3454 ASTContext &C = CGM.getContext(); 3455 // Build struct .kmp_privates_t. { 3456 // /* private vars */ 3457 // }; 3458 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3459 RD->startDefinition(); 3460 for (const auto &Pair : Privates) { 3461 const VarDecl *VD = Pair.second.Original; 3462 QualType Type = VD->getType().getNonReferenceType(); 3463 // If the private variable is a local variable with lvalue ref type, 3464 // allocate the pointer instead of the pointee type. 3465 if (Pair.second.isLocalPrivate()) { 3466 if (VD->getType()->isLValueReferenceType()) 3467 Type = C.getPointerType(Type); 3468 if (isAllocatableDecl(VD)) 3469 Type = C.getPointerType(Type); 3470 } 3471 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3472 if (VD->hasAttrs()) { 3473 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3474 E(VD->getAttrs().end()); 3475 I != E; ++I) 3476 FD->addAttr(*I); 3477 } 3478 } 3479 RD->completeDefinition(); 3480 return RD; 3481 } 3482 return nullptr; 3483 } 3484 3485 static RecordDecl * 3486 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3487 QualType KmpInt32Ty, 3488 QualType KmpRoutineEntryPointerQTy) { 3489 ASTContext &C = CGM.getContext(); 3490 // Build struct kmp_task_t { 3491 // void * shareds; 3492 // kmp_routine_entry_t routine; 3493 // kmp_int32 part_id; 3494 // kmp_cmplrdata_t data1; 3495 // kmp_cmplrdata_t data2; 3496 // For taskloops additional fields: 3497 // kmp_uint64 lb; 3498 // kmp_uint64 ub; 3499 // kmp_int64 st; 3500 // kmp_int32 liter; 3501 // void * reductions; 3502 // }; 3503 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3504 UD->startDefinition(); 3505 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3506 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3507 UD->completeDefinition(); 3508 QualType KmpCmplrdataTy = C.getRecordType(UD); 3509 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3510 RD->startDefinition(); 3511 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3512 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3513 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3514 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3515 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3516 if (isOpenMPTaskLoopDirective(Kind)) { 3517 QualType KmpUInt64Ty = 3518 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3519 QualType KmpInt64Ty = 3520 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3521 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3522 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3523 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3524 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3525 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3526 } 3527 RD->completeDefinition(); 3528 return RD; 3529 } 3530 3531 static RecordDecl * 3532 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3533 ArrayRef<PrivateDataTy> Privates) { 3534 ASTContext &C = CGM.getContext(); 3535 // Build struct kmp_task_t_with_privates { 3536 // kmp_task_t task_data; 3537 // .kmp_privates_t. privates; 3538 // }; 3539 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3540 RD->startDefinition(); 3541 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3542 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3543 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3544 RD->completeDefinition(); 3545 return RD; 3546 } 3547 3548 /// Emit a proxy function which accepts kmp_task_t as the second 3549 /// argument. 3550 /// \code 3551 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3552 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3553 /// For taskloops: 3554 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3555 /// tt->reductions, tt->shareds); 3556 /// return 0; 3557 /// } 3558 /// \endcode 3559 static llvm::Function * 3560 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3561 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3562 QualType KmpTaskTWithPrivatesPtrQTy, 3563 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3564 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3565 llvm::Value *TaskPrivatesMap) { 3566 ASTContext &C = CGM.getContext(); 3567 FunctionArgList Args; 3568 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3569 ImplicitParamDecl::Other); 3570 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3571 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3572 ImplicitParamDecl::Other); 3573 Args.push_back(&GtidArg); 3574 Args.push_back(&TaskTypeArg); 3575 const auto &TaskEntryFnInfo = 3576 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3577 llvm::FunctionType *TaskEntryTy = 3578 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3579 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3580 auto *TaskEntry = llvm::Function::Create( 3581 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3582 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3583 TaskEntry->setDoesNotRecurse(); 3584 CodeGenFunction CGF(CGM); 3585 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3586 Loc, Loc); 3587 3588 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3589 // tt, 3590 // For taskloops: 3591 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3592 // tt->task_data.shareds); 3593 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3594 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3595 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3596 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3597 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3598 const auto *KmpTaskTWithPrivatesQTyRD = 3599 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3600 LValue Base = 3601 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3602 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3603 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3604 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3605 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3606 3607 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3608 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3609 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3610 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3611 CGF.ConvertTypeForMem(SharedsPtrTy)); 3612 3613 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3614 llvm::Value *PrivatesParam; 3615 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3616 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3617 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3618 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3619 } else { 3620 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3621 } 3622 3623 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3624 TaskPrivatesMap, 3625 CGF.Builder 3626 .CreatePointerBitCastOrAddrSpaceCast( 3627 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3628 .getPointer()}; 3629 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3630 std::end(CommonArgs)); 3631 if (isOpenMPTaskLoopDirective(Kind)) { 3632 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3633 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3634 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3635 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3636 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3637 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3638 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3639 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3640 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3641 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3642 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3643 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3644 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3645 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3646 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3647 CallArgs.push_back(LBParam); 3648 CallArgs.push_back(UBParam); 3649 CallArgs.push_back(StParam); 3650 CallArgs.push_back(LIParam); 3651 CallArgs.push_back(RParam); 3652 } 3653 CallArgs.push_back(SharedsParam); 3654 3655 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3656 CallArgs); 3657 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3658 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3659 CGF.FinishFunction(); 3660 return TaskEntry; 3661 } 3662 3663 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3664 SourceLocation Loc, 3665 QualType KmpInt32Ty, 3666 QualType KmpTaskTWithPrivatesPtrQTy, 3667 QualType KmpTaskTWithPrivatesQTy) { 3668 ASTContext &C = CGM.getContext(); 3669 FunctionArgList Args; 3670 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3671 ImplicitParamDecl::Other); 3672 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3673 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3674 ImplicitParamDecl::Other); 3675 Args.push_back(&GtidArg); 3676 Args.push_back(&TaskTypeArg); 3677 const auto &DestructorFnInfo = 3678 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3679 llvm::FunctionType *DestructorFnTy = 3680 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3681 std::string Name = 3682 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3683 auto *DestructorFn = 3684 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3685 Name, &CGM.getModule()); 3686 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3687 DestructorFnInfo); 3688 DestructorFn->setDoesNotRecurse(); 3689 CodeGenFunction CGF(CGM); 3690 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3691 Args, Loc, Loc); 3692 3693 LValue Base = CGF.EmitLoadOfPointerLValue( 3694 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3695 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3696 const auto *KmpTaskTWithPrivatesQTyRD = 3697 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3698 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3699 Base = CGF.EmitLValueForField(Base, *FI); 3700 for (const auto *Field : 3701 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3702 if (QualType::DestructionKind DtorKind = 3703 Field->getType().isDestructedType()) { 3704 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3705 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3706 } 3707 } 3708 CGF.FinishFunction(); 3709 return DestructorFn; 3710 } 3711 3712 /// Emit a privates mapping function for correct handling of private and 3713 /// firstprivate variables. 3714 /// \code 3715 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3716 /// **noalias priv1,..., <tyn> **noalias privn) { 3717 /// *priv1 = &.privates.priv1; 3718 /// ...; 3719 /// *privn = &.privates.privn; 3720 /// } 3721 /// \endcode 3722 static llvm::Value * 3723 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3724 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3725 ArrayRef<PrivateDataTy> Privates) { 3726 ASTContext &C = CGM.getContext(); 3727 FunctionArgList Args; 3728 ImplicitParamDecl TaskPrivatesArg( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3731 ImplicitParamDecl::Other); 3732 Args.push_back(&TaskPrivatesArg); 3733 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3734 unsigned Counter = 1; 3735 for (const Expr *E : Data.PrivateVars) { 3736 Args.push_back(ImplicitParamDecl::Create( 3737 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3738 C.getPointerType(C.getPointerType(E->getType())) 3739 .withConst() 3740 .withRestrict(), 3741 ImplicitParamDecl::Other)); 3742 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3743 PrivateVarsPos[VD] = Counter; 3744 ++Counter; 3745 } 3746 for (const Expr *E : Data.FirstprivateVars) { 3747 Args.push_back(ImplicitParamDecl::Create( 3748 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3749 C.getPointerType(C.getPointerType(E->getType())) 3750 .withConst() 3751 .withRestrict(), 3752 ImplicitParamDecl::Other)); 3753 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3754 PrivateVarsPos[VD] = Counter; 3755 ++Counter; 3756 } 3757 for (const Expr *E : Data.LastprivateVars) { 3758 Args.push_back(ImplicitParamDecl::Create( 3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3760 C.getPointerType(C.getPointerType(E->getType())) 3761 .withConst() 3762 .withRestrict(), 3763 ImplicitParamDecl::Other)); 3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3765 PrivateVarsPos[VD] = Counter; 3766 ++Counter; 3767 } 3768 for (const VarDecl *VD : Data.PrivateLocals) { 3769 QualType Ty = VD->getType().getNonReferenceType(); 3770 if (VD->getType()->isLValueReferenceType()) 3771 Ty = C.getPointerType(Ty); 3772 if (isAllocatableDecl(VD)) 3773 Ty = C.getPointerType(Ty); 3774 Args.push_back(ImplicitParamDecl::Create( 3775 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3776 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3777 ImplicitParamDecl::Other)); 3778 PrivateVarsPos[VD] = Counter; 3779 ++Counter; 3780 } 3781 const auto &TaskPrivatesMapFnInfo = 3782 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3783 llvm::FunctionType *TaskPrivatesMapTy = 3784 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3785 std::string Name = 3786 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3787 auto *TaskPrivatesMap = llvm::Function::Create( 3788 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3789 &CGM.getModule()); 3790 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3791 TaskPrivatesMapFnInfo); 3792 if (CGM.getLangOpts().Optimize) { 3793 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3794 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3795 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3796 } 3797 CodeGenFunction CGF(CGM); 3798 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3799 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3800 3801 // *privi = &.privates.privi; 3802 LValue Base = CGF.EmitLoadOfPointerLValue( 3803 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3804 TaskPrivatesArg.getType()->castAs<PointerType>()); 3805 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3806 Counter = 0; 3807 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3808 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3809 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3810 LValue RefLVal = 3811 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3812 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3813 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3814 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3815 ++Counter; 3816 } 3817 CGF.FinishFunction(); 3818 return TaskPrivatesMap; 3819 } 3820 3821 /// Emit initialization for private variables in task-based directives. 3822 static void emitPrivatesInit(CodeGenFunction &CGF, 3823 const OMPExecutableDirective &D, 3824 Address KmpTaskSharedsPtr, LValue TDBase, 3825 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3826 QualType SharedsTy, QualType SharedsPtrTy, 3827 const OMPTaskDataTy &Data, 3828 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3829 ASTContext &C = CGF.getContext(); 3830 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3831 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3832 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3833 ? OMPD_taskloop 3834 : OMPD_task; 3835 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3836 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3837 LValue SrcBase; 3838 bool IsTargetTask = 3839 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3840 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3841 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3842 // PointersArray, SizesArray, and MappersArray. The original variables for 3843 // these arrays are not captured and we get their addresses explicitly. 3844 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3845 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3846 SrcBase = CGF.MakeAddrLValue( 3847 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3848 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3849 SharedsTy); 3850 } 3851 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3852 for (const PrivateDataTy &Pair : Privates) { 3853 // Do not initialize private locals. 3854 if (Pair.second.isLocalPrivate()) { 3855 ++FI; 3856 continue; 3857 } 3858 const VarDecl *VD = Pair.second.PrivateCopy; 3859 const Expr *Init = VD->getAnyInitializer(); 3860 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3861 !CGF.isTrivialInitializer(Init)))) { 3862 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3863 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3864 const VarDecl *OriginalVD = Pair.second.Original; 3865 // Check if the variable is the target-based BasePointersArray, 3866 // PointersArray, SizesArray, or MappersArray. 3867 LValue SharedRefLValue; 3868 QualType Type = PrivateLValue.getType(); 3869 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3870 if (IsTargetTask && !SharedField) { 3871 assert(isa<ImplicitParamDecl>(OriginalVD) && 3872 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3873 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3874 ->getNumParams() == 0 && 3875 isa<TranslationUnitDecl>( 3876 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3877 ->getDeclContext()) && 3878 "Expected artificial target data variable."); 3879 SharedRefLValue = 3880 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3881 } else if (ForDup) { 3882 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3883 SharedRefLValue = CGF.MakeAddrLValue( 3884 Address(SharedRefLValue.getPointer(CGF), 3885 C.getDeclAlign(OriginalVD)), 3886 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3887 SharedRefLValue.getTBAAInfo()); 3888 } else if (CGF.LambdaCaptureFields.count( 3889 Pair.second.Original->getCanonicalDecl()) > 0 || 3890 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3891 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3892 } else { 3893 // Processing for implicitly captured variables. 3894 InlinedOpenMPRegionRAII Region( 3895 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3896 /*HasCancel=*/false, /*NoInheritance=*/true); 3897 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3898 } 3899 if (Type->isArrayType()) { 3900 // Initialize firstprivate array. 3901 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3902 // Perform simple memcpy. 3903 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3904 } else { 3905 // Initialize firstprivate array using element-by-element 3906 // initialization. 3907 CGF.EmitOMPAggregateAssign( 3908 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3909 Type, 3910 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3911 Address SrcElement) { 3912 // Clean up any temporaries needed by the initialization. 3913 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3914 InitScope.addPrivate( 3915 Elem, [SrcElement]() -> Address { return SrcElement; }); 3916 (void)InitScope.Privatize(); 3917 // Emit initialization for single element. 3918 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3919 CGF, &CapturesInfo); 3920 CGF.EmitAnyExprToMem(Init, DestElement, 3921 Init->getType().getQualifiers(), 3922 /*IsInitializer=*/false); 3923 }); 3924 } 3925 } else { 3926 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3927 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3928 return SharedRefLValue.getAddress(CGF); 3929 }); 3930 (void)InitScope.Privatize(); 3931 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3932 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3933 /*capturedByInit=*/false); 3934 } 3935 } else { 3936 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3937 } 3938 } 3939 ++FI; 3940 } 3941 } 3942 3943 /// Check if duplication function is required for taskloops. 3944 static bool checkInitIsRequired(CodeGenFunction &CGF, 3945 ArrayRef<PrivateDataTy> Privates) { 3946 bool InitRequired = false; 3947 for (const PrivateDataTy &Pair : Privates) { 3948 if (Pair.second.isLocalPrivate()) 3949 continue; 3950 const VarDecl *VD = Pair.second.PrivateCopy; 3951 const Expr *Init = VD->getAnyInitializer(); 3952 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3953 !CGF.isTrivialInitializer(Init)); 3954 if (InitRequired) 3955 break; 3956 } 3957 return InitRequired; 3958 } 3959 3960 3961 /// Emit task_dup function (for initialization of 3962 /// private/firstprivate/lastprivate vars and last_iter flag) 3963 /// \code 3964 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3965 /// lastpriv) { 3966 /// // setup lastprivate flag 3967 /// task_dst->last = lastpriv; 3968 /// // could be constructor calls here... 3969 /// } 3970 /// \endcode 3971 static llvm::Value * 3972 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3973 const OMPExecutableDirective &D, 3974 QualType KmpTaskTWithPrivatesPtrQTy, 3975 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3976 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3977 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3978 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3979 ASTContext &C = CGM.getContext(); 3980 FunctionArgList Args; 3981 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3982 KmpTaskTWithPrivatesPtrQTy, 3983 ImplicitParamDecl::Other); 3984 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3985 KmpTaskTWithPrivatesPtrQTy, 3986 ImplicitParamDecl::Other); 3987 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3988 ImplicitParamDecl::Other); 3989 Args.push_back(&DstArg); 3990 Args.push_back(&SrcArg); 3991 Args.push_back(&LastprivArg); 3992 const auto &TaskDupFnInfo = 3993 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3994 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3995 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3996 auto *TaskDup = llvm::Function::Create( 3997 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3998 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3999 TaskDup->setDoesNotRecurse(); 4000 CodeGenFunction CGF(CGM); 4001 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4002 Loc); 4003 4004 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4005 CGF.GetAddrOfLocalVar(&DstArg), 4006 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4007 // task_dst->liter = lastpriv; 4008 if (WithLastIter) { 4009 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4010 LValue Base = CGF.EmitLValueForField( 4011 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4012 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4013 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4014 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4015 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4016 } 4017 4018 // Emit initial values for private copies (if any). 4019 assert(!Privates.empty()); 4020 Address KmpTaskSharedsPtr = Address::invalid(); 4021 if (!Data.FirstprivateVars.empty()) { 4022 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4023 CGF.GetAddrOfLocalVar(&SrcArg), 4024 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4025 LValue Base = CGF.EmitLValueForField( 4026 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4027 KmpTaskSharedsPtr = Address( 4028 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4029 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4030 KmpTaskTShareds)), 4031 Loc), 4032 CGM.getNaturalTypeAlignment(SharedsTy)); 4033 } 4034 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4035 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4036 CGF.FinishFunction(); 4037 return TaskDup; 4038 } 4039 4040 /// Checks if destructor function is required to be generated. 4041 /// \return true if cleanups are required, false otherwise. 4042 static bool 4043 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4044 ArrayRef<PrivateDataTy> Privates) { 4045 for (const PrivateDataTy &P : Privates) { 4046 if (P.second.isLocalPrivate()) 4047 continue; 4048 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4049 if (Ty.isDestructedType()) 4050 return true; 4051 } 4052 return false; 4053 } 4054 4055 namespace { 4056 /// Loop generator for OpenMP iterator expression. 4057 class OMPIteratorGeneratorScope final 4058 : public CodeGenFunction::OMPPrivateScope { 4059 CodeGenFunction &CGF; 4060 const OMPIteratorExpr *E = nullptr; 4061 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4062 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4063 OMPIteratorGeneratorScope() = delete; 4064 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4065 4066 public: 4067 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4068 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4069 if (!E) 4070 return; 4071 SmallVector<llvm::Value *, 4> Uppers; 4072 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4073 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4074 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4075 addPrivate(VD, [&CGF, VD]() { 4076 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4077 }); 4078 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4079 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4080 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4081 "counter.addr"); 4082 }); 4083 } 4084 Privatize(); 4085 4086 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4087 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4088 LValue CLVal = 4089 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4090 HelperData.CounterVD->getType()); 4091 // Counter = 0; 4092 CGF.EmitStoreOfScalar( 4093 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4094 CLVal); 4095 CodeGenFunction::JumpDest &ContDest = 4096 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4097 CodeGenFunction::JumpDest &ExitDest = 4098 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4099 // N = <number-of_iterations>; 4100 llvm::Value *N = Uppers[I]; 4101 // cont: 4102 // if (Counter < N) goto body; else goto exit; 4103 CGF.EmitBlock(ContDest.getBlock()); 4104 auto *CVal = 4105 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4106 llvm::Value *Cmp = 4107 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4108 ? CGF.Builder.CreateICmpSLT(CVal, N) 4109 : CGF.Builder.CreateICmpULT(CVal, N); 4110 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4111 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4112 // body: 4113 CGF.EmitBlock(BodyBB); 4114 // Iteri = Begini + Counter * Stepi; 4115 CGF.EmitIgnoredExpr(HelperData.Update); 4116 } 4117 } 4118 ~OMPIteratorGeneratorScope() { 4119 if (!E) 4120 return; 4121 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4122 // Counter = Counter + 1; 4123 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4124 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4125 // goto cont; 4126 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4127 // exit: 4128 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4129 } 4130 } 4131 }; 4132 } // namespace 4133 4134 static std::pair<llvm::Value *, llvm::Value *> 4135 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4136 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4137 llvm::Value *Addr; 4138 if (OASE) { 4139 const Expr *Base = OASE->getBase(); 4140 Addr = CGF.EmitScalarExpr(Base); 4141 } else { 4142 Addr = CGF.EmitLValue(E).getPointer(CGF); 4143 } 4144 llvm::Value *SizeVal; 4145 QualType Ty = E->getType(); 4146 if (OASE) { 4147 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4148 for (const Expr *SE : OASE->getDimensions()) { 4149 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4150 Sz = CGF.EmitScalarConversion( 4151 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4152 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4153 } 4154 } else if (const auto *ASE = 4155 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4156 LValue UpAddrLVal = 4157 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4158 llvm::Value *UpAddr = 4159 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4160 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4161 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4162 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4163 } else { 4164 SizeVal = CGF.getTypeSize(Ty); 4165 } 4166 return std::make_pair(Addr, SizeVal); 4167 } 4168 4169 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4170 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4171 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4172 if (KmpTaskAffinityInfoTy.isNull()) { 4173 RecordDecl *KmpAffinityInfoRD = 4174 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4175 KmpAffinityInfoRD->startDefinition(); 4176 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4177 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4178 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4179 KmpAffinityInfoRD->completeDefinition(); 4180 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4181 } 4182 } 4183 4184 CGOpenMPRuntime::TaskResultTy 4185 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4186 const OMPExecutableDirective &D, 4187 llvm::Function *TaskFunction, QualType SharedsTy, 4188 Address Shareds, const OMPTaskDataTy &Data) { 4189 ASTContext &C = CGM.getContext(); 4190 llvm::SmallVector<PrivateDataTy, 4> Privates; 4191 // Aggregate privates and sort them by the alignment. 4192 const auto *I = Data.PrivateCopies.begin(); 4193 for (const Expr *E : Data.PrivateVars) { 4194 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4195 Privates.emplace_back( 4196 C.getDeclAlign(VD), 4197 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4198 /*PrivateElemInit=*/nullptr)); 4199 ++I; 4200 } 4201 I = Data.FirstprivateCopies.begin(); 4202 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4203 for (const Expr *E : Data.FirstprivateVars) { 4204 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4205 Privates.emplace_back( 4206 C.getDeclAlign(VD), 4207 PrivateHelpersTy( 4208 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4209 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4210 ++I; 4211 ++IElemInitRef; 4212 } 4213 I = Data.LastprivateCopies.begin(); 4214 for (const Expr *E : Data.LastprivateVars) { 4215 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4216 Privates.emplace_back( 4217 C.getDeclAlign(VD), 4218 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4219 /*PrivateElemInit=*/nullptr)); 4220 ++I; 4221 } 4222 for (const VarDecl *VD : Data.PrivateLocals) { 4223 if (isAllocatableDecl(VD)) 4224 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4225 else 4226 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4227 } 4228 llvm::stable_sort(Privates, 4229 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4230 return L.first > R.first; 4231 }); 4232 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4233 // Build type kmp_routine_entry_t (if not built yet). 4234 emitKmpRoutineEntryT(KmpInt32Ty); 4235 // Build type kmp_task_t (if not built yet). 4236 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4237 if (SavedKmpTaskloopTQTy.isNull()) { 4238 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4239 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4240 } 4241 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4242 } else { 4243 assert((D.getDirectiveKind() == OMPD_task || 4244 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4245 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4246 "Expected taskloop, task or target directive"); 4247 if (SavedKmpTaskTQTy.isNull()) { 4248 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4249 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4250 } 4251 KmpTaskTQTy = SavedKmpTaskTQTy; 4252 } 4253 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4254 // Build particular struct kmp_task_t for the given task. 4255 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4256 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4257 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4258 QualType KmpTaskTWithPrivatesPtrQTy = 4259 C.getPointerType(KmpTaskTWithPrivatesQTy); 4260 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4261 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4262 KmpTaskTWithPrivatesTy->getPointerTo(); 4263 llvm::Value *KmpTaskTWithPrivatesTySize = 4264 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4265 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4266 4267 // Emit initial values for private copies (if any). 4268 llvm::Value *TaskPrivatesMap = nullptr; 4269 llvm::Type *TaskPrivatesMapTy = 4270 std::next(TaskFunction->arg_begin(), 3)->getType(); 4271 if (!Privates.empty()) { 4272 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4273 TaskPrivatesMap = 4274 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4275 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4276 TaskPrivatesMap, TaskPrivatesMapTy); 4277 } else { 4278 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4279 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4280 } 4281 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4282 // kmp_task_t *tt); 4283 llvm::Function *TaskEntry = emitProxyTaskFunction( 4284 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4285 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4286 TaskPrivatesMap); 4287 4288 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4289 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4290 // kmp_routine_entry_t *task_entry); 4291 // Task flags. Format is taken from 4292 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4293 // description of kmp_tasking_flags struct. 4294 enum { 4295 TiedFlag = 0x1, 4296 FinalFlag = 0x2, 4297 DestructorsFlag = 0x8, 4298 PriorityFlag = 0x20, 4299 DetachableFlag = 0x40, 4300 }; 4301 unsigned Flags = Data.Tied ? TiedFlag : 0; 4302 bool NeedsCleanup = false; 4303 if (!Privates.empty()) { 4304 NeedsCleanup = 4305 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4306 if (NeedsCleanup) 4307 Flags = Flags | DestructorsFlag; 4308 } 4309 if (Data.Priority.getInt()) 4310 Flags = Flags | PriorityFlag; 4311 if (D.hasClausesOfKind<OMPDetachClause>()) 4312 Flags = Flags | DetachableFlag; 4313 llvm::Value *TaskFlags = 4314 Data.Final.getPointer() 4315 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4316 CGF.Builder.getInt32(FinalFlag), 4317 CGF.Builder.getInt32(/*C=*/0)) 4318 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4319 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4320 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4321 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4322 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4323 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4324 TaskEntry, KmpRoutineEntryPtrTy)}; 4325 llvm::Value *NewTask; 4326 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4327 // Check if we have any device clause associated with the directive. 4328 const Expr *Device = nullptr; 4329 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4330 Device = C->getDevice(); 4331 // Emit device ID if any otherwise use default value. 4332 llvm::Value *DeviceID; 4333 if (Device) 4334 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4335 CGF.Int64Ty, /*isSigned=*/true); 4336 else 4337 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4338 AllocArgs.push_back(DeviceID); 4339 NewTask = CGF.EmitRuntimeCall( 4340 OMPBuilder.getOrCreateRuntimeFunction( 4341 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4342 AllocArgs); 4343 } else { 4344 NewTask = 4345 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4346 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4347 AllocArgs); 4348 } 4349 // Emit detach clause initialization. 4350 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4351 // task_descriptor); 4352 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4353 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4354 LValue EvtLVal = CGF.EmitLValue(Evt); 4355 4356 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4357 // int gtid, kmp_task_t *task); 4358 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4359 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4360 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4361 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4362 OMPBuilder.getOrCreateRuntimeFunction( 4363 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4364 {Loc, Tid, NewTask}); 4365 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4366 Evt->getExprLoc()); 4367 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4368 } 4369 // Process affinity clauses. 4370 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4371 // Process list of affinity data. 4372 ASTContext &C = CGM.getContext(); 4373 Address AffinitiesArray = Address::invalid(); 4374 // Calculate number of elements to form the array of affinity data. 4375 llvm::Value *NumOfElements = nullptr; 4376 unsigned NumAffinities = 0; 4377 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4378 if (const Expr *Modifier = C->getModifier()) { 4379 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4380 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4381 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4382 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4383 NumOfElements = 4384 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4385 } 4386 } else { 4387 NumAffinities += C->varlist_size(); 4388 } 4389 } 4390 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4391 // Fields ids in kmp_task_affinity_info record. 4392 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4393 4394 QualType KmpTaskAffinityInfoArrayTy; 4395 if (NumOfElements) { 4396 NumOfElements = CGF.Builder.CreateNUWAdd( 4397 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4398 OpaqueValueExpr OVE( 4399 Loc, 4400 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4401 VK_PRValue); 4402 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4403 RValue::get(NumOfElements)); 4404 KmpTaskAffinityInfoArrayTy = 4405 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4406 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4407 // Properly emit variable-sized array. 4408 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4409 ImplicitParamDecl::Other); 4410 CGF.EmitVarDecl(*PD); 4411 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4412 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4413 /*isSigned=*/false); 4414 } else { 4415 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4416 KmpTaskAffinityInfoTy, 4417 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4418 ArrayType::Normal, /*IndexTypeQuals=*/0); 4419 AffinitiesArray = 4420 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4421 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4422 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4423 /*isSigned=*/false); 4424 } 4425 4426 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4427 // Fill array by elements without iterators. 4428 unsigned Pos = 0; 4429 bool HasIterator = false; 4430 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4431 if (C->getModifier()) { 4432 HasIterator = true; 4433 continue; 4434 } 4435 for (const Expr *E : C->varlists()) { 4436 llvm::Value *Addr; 4437 llvm::Value *Size; 4438 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4439 LValue Base = 4440 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4441 KmpTaskAffinityInfoTy); 4442 // affs[i].base_addr = &<Affinities[i].second>; 4443 LValue BaseAddrLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4445 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4446 BaseAddrLVal); 4447 // affs[i].len = sizeof(<Affinities[i].second>); 4448 LValue LenLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4450 CGF.EmitStoreOfScalar(Size, LenLVal); 4451 ++Pos; 4452 } 4453 } 4454 LValue PosLVal; 4455 if (HasIterator) { 4456 PosLVal = CGF.MakeAddrLValue( 4457 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4458 C.getSizeType()); 4459 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4460 } 4461 // Process elements with iterators. 4462 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4463 const Expr *Modifier = C->getModifier(); 4464 if (!Modifier) 4465 continue; 4466 OMPIteratorGeneratorScope IteratorScope( 4467 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4468 for (const Expr *E : C->varlists()) { 4469 llvm::Value *Addr; 4470 llvm::Value *Size; 4471 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4472 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4473 LValue Base = CGF.MakeAddrLValue( 4474 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4475 AffinitiesArray.getAlignment()), 4476 KmpTaskAffinityInfoTy); 4477 // affs[i].base_addr = &<Affinities[i].second>; 4478 LValue BaseAddrLVal = CGF.EmitLValueForField( 4479 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4480 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4481 BaseAddrLVal); 4482 // affs[i].len = sizeof(<Affinities[i].second>); 4483 LValue LenLVal = CGF.EmitLValueForField( 4484 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4485 CGF.EmitStoreOfScalar(Size, LenLVal); 4486 Idx = CGF.Builder.CreateNUWAdd( 4487 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4488 CGF.EmitStoreOfScalar(Idx, PosLVal); 4489 } 4490 } 4491 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4492 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4493 // naffins, kmp_task_affinity_info_t *affin_list); 4494 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4495 llvm::Value *GTid = getThreadID(CGF, Loc); 4496 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4497 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4498 // FIXME: Emit the function and ignore its result for now unless the 4499 // runtime function is properly implemented. 4500 (void)CGF.EmitRuntimeCall( 4501 OMPBuilder.getOrCreateRuntimeFunction( 4502 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4503 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4504 } 4505 llvm::Value *NewTaskNewTaskTTy = 4506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4507 NewTask, KmpTaskTWithPrivatesPtrTy); 4508 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4509 KmpTaskTWithPrivatesQTy); 4510 LValue TDBase = 4511 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4512 // Fill the data in the resulting kmp_task_t record. 4513 // Copy shareds if there are any. 4514 Address KmpTaskSharedsPtr = Address::invalid(); 4515 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4516 KmpTaskSharedsPtr = 4517 Address(CGF.EmitLoadOfScalar( 4518 CGF.EmitLValueForField( 4519 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4520 KmpTaskTShareds)), 4521 Loc), 4522 CGM.getNaturalTypeAlignment(SharedsTy)); 4523 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4524 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4525 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4526 } 4527 // Emit initial values for private copies (if any). 4528 TaskResultTy Result; 4529 if (!Privates.empty()) { 4530 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4531 SharedsTy, SharedsPtrTy, Data, Privates, 4532 /*ForDup=*/false); 4533 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4534 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4535 Result.TaskDupFn = emitTaskDupFunction( 4536 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4537 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4538 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4539 } 4540 } 4541 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4542 enum { Priority = 0, Destructors = 1 }; 4543 // Provide pointer to function with destructors for privates. 4544 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4545 const RecordDecl *KmpCmplrdataUD = 4546 (*FI)->getType()->getAsUnionType()->getDecl(); 4547 if (NeedsCleanup) { 4548 llvm::Value *DestructorFn = emitDestructorsFunction( 4549 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4550 KmpTaskTWithPrivatesQTy); 4551 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4552 LValue DestructorsLV = CGF.EmitLValueForField( 4553 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4554 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4555 DestructorFn, KmpRoutineEntryPtrTy), 4556 DestructorsLV); 4557 } 4558 // Set priority. 4559 if (Data.Priority.getInt()) { 4560 LValue Data2LV = CGF.EmitLValueForField( 4561 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4562 LValue PriorityLV = CGF.EmitLValueForField( 4563 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4564 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4565 } 4566 Result.NewTask = NewTask; 4567 Result.TaskEntry = TaskEntry; 4568 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4569 Result.TDBase = TDBase; 4570 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4571 return Result; 4572 } 4573 4574 namespace { 4575 /// Dependence kind for RTL. 4576 enum RTLDependenceKindTy { 4577 DepIn = 0x01, 4578 DepInOut = 0x3, 4579 DepMutexInOutSet = 0x4 4580 }; 4581 /// Fields ids in kmp_depend_info record. 4582 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4583 } // namespace 4584 4585 /// Translates internal dependency kind into the runtime kind. 4586 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4587 RTLDependenceKindTy DepKind; 4588 switch (K) { 4589 case OMPC_DEPEND_in: 4590 DepKind = DepIn; 4591 break; 4592 // Out and InOut dependencies must use the same code. 4593 case OMPC_DEPEND_out: 4594 case OMPC_DEPEND_inout: 4595 DepKind = DepInOut; 4596 break; 4597 case OMPC_DEPEND_mutexinoutset: 4598 DepKind = DepMutexInOutSet; 4599 break; 4600 case OMPC_DEPEND_source: 4601 case OMPC_DEPEND_sink: 4602 case OMPC_DEPEND_depobj: 4603 case OMPC_DEPEND_unknown: 4604 llvm_unreachable("Unknown task dependence type"); 4605 } 4606 return DepKind; 4607 } 4608 4609 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4610 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4611 QualType &FlagsTy) { 4612 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4613 if (KmpDependInfoTy.isNull()) { 4614 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4615 KmpDependInfoRD->startDefinition(); 4616 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4617 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4618 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4619 KmpDependInfoRD->completeDefinition(); 4620 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4621 } 4622 } 4623 4624 std::pair<llvm::Value *, LValue> 4625 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4626 SourceLocation Loc) { 4627 ASTContext &C = CGM.getContext(); 4628 QualType FlagsTy; 4629 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4630 RecordDecl *KmpDependInfoRD = 4631 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4632 LValue Base = CGF.EmitLoadOfPointerLValue( 4633 DepobjLVal.getAddress(CGF), 4634 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4635 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4636 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4637 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4638 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4639 Base.getTBAAInfo()); 4640 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4641 Addr.getPointer(), 4642 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4643 LValue NumDepsBase = CGF.MakeAddrLValue( 4644 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4645 Base.getBaseInfo(), Base.getTBAAInfo()); 4646 // NumDeps = deps[i].base_addr; 4647 LValue BaseAddrLVal = CGF.EmitLValueForField( 4648 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4649 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4650 return std::make_pair(NumDeps, Base); 4651 } 4652 4653 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4654 llvm::PointerUnion<unsigned *, LValue *> Pos, 4655 const OMPTaskDataTy::DependData &Data, 4656 Address DependenciesArray) { 4657 CodeGenModule &CGM = CGF.CGM; 4658 ASTContext &C = CGM.getContext(); 4659 QualType FlagsTy; 4660 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4661 RecordDecl *KmpDependInfoRD = 4662 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4663 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4664 4665 OMPIteratorGeneratorScope IteratorScope( 4666 CGF, cast_or_null<OMPIteratorExpr>( 4667 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4668 : nullptr)); 4669 for (const Expr *E : Data.DepExprs) { 4670 llvm::Value *Addr; 4671 llvm::Value *Size; 4672 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4673 LValue Base; 4674 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4675 Base = CGF.MakeAddrLValue( 4676 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4677 } else { 4678 LValue &PosLVal = *Pos.get<LValue *>(); 4679 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4680 Base = CGF.MakeAddrLValue( 4681 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4682 DependenciesArray.getAlignment()), 4683 KmpDependInfoTy); 4684 } 4685 // deps[i].base_addr = &<Dependencies[i].second>; 4686 LValue BaseAddrLVal = CGF.EmitLValueForField( 4687 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4688 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4689 BaseAddrLVal); 4690 // deps[i].len = sizeof(<Dependencies[i].second>); 4691 LValue LenLVal = CGF.EmitLValueForField( 4692 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4693 CGF.EmitStoreOfScalar(Size, LenLVal); 4694 // deps[i].flags = <Dependencies[i].first>; 4695 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4696 LValue FlagsLVal = CGF.EmitLValueForField( 4697 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4698 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4699 FlagsLVal); 4700 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4701 ++(*P); 4702 } else { 4703 LValue &PosLVal = *Pos.get<LValue *>(); 4704 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4705 Idx = CGF.Builder.CreateNUWAdd(Idx, 4706 llvm::ConstantInt::get(Idx->getType(), 1)); 4707 CGF.EmitStoreOfScalar(Idx, PosLVal); 4708 } 4709 } 4710 } 4711 4712 static SmallVector<llvm::Value *, 4> 4713 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4714 const OMPTaskDataTy::DependData &Data) { 4715 assert(Data.DepKind == OMPC_DEPEND_depobj && 4716 "Expected depobj dependecy kind."); 4717 SmallVector<llvm::Value *, 4> Sizes; 4718 SmallVector<LValue, 4> SizeLVals; 4719 ASTContext &C = CGF.getContext(); 4720 QualType FlagsTy; 4721 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4722 RecordDecl *KmpDependInfoRD = 4723 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4724 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4725 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4726 { 4727 OMPIteratorGeneratorScope IteratorScope( 4728 CGF, cast_or_null<OMPIteratorExpr>( 4729 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4730 : nullptr)); 4731 for (const Expr *E : Data.DepExprs) { 4732 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4733 LValue Base = CGF.EmitLoadOfPointerLValue( 4734 DepobjLVal.getAddress(CGF), 4735 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4736 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4737 Base.getAddress(CGF), KmpDependInfoPtrT); 4738 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4739 Base.getTBAAInfo()); 4740 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4741 Addr.getPointer(), 4742 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4743 LValue NumDepsBase = CGF.MakeAddrLValue( 4744 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4745 Base.getBaseInfo(), Base.getTBAAInfo()); 4746 // NumDeps = deps[i].base_addr; 4747 LValue BaseAddrLVal = CGF.EmitLValueForField( 4748 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4749 llvm::Value *NumDeps = 4750 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4751 LValue NumLVal = CGF.MakeAddrLValue( 4752 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4753 C.getUIntPtrType()); 4754 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4755 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4756 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4757 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4758 CGF.EmitStoreOfScalar(Add, NumLVal); 4759 SizeLVals.push_back(NumLVal); 4760 } 4761 } 4762 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4763 llvm::Value *Size = 4764 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4765 Sizes.push_back(Size); 4766 } 4767 return Sizes; 4768 } 4769 4770 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4771 LValue PosLVal, 4772 const OMPTaskDataTy::DependData &Data, 4773 Address DependenciesArray) { 4774 assert(Data.DepKind == OMPC_DEPEND_depobj && 4775 "Expected depobj dependecy kind."); 4776 ASTContext &C = CGF.getContext(); 4777 QualType FlagsTy; 4778 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4779 RecordDecl *KmpDependInfoRD = 4780 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4781 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4782 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4783 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4784 { 4785 OMPIteratorGeneratorScope IteratorScope( 4786 CGF, cast_or_null<OMPIteratorExpr>( 4787 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4788 : nullptr)); 4789 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4790 const Expr *E = Data.DepExprs[I]; 4791 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4792 LValue Base = CGF.EmitLoadOfPointerLValue( 4793 DepobjLVal.getAddress(CGF), 4794 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4795 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4796 Base.getAddress(CGF), KmpDependInfoPtrT); 4797 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4798 Base.getTBAAInfo()); 4799 4800 // Get number of elements in a single depobj. 4801 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4802 Addr.getPointer(), 4803 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4804 LValue NumDepsBase = CGF.MakeAddrLValue( 4805 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4806 Base.getBaseInfo(), Base.getTBAAInfo()); 4807 // NumDeps = deps[i].base_addr; 4808 LValue BaseAddrLVal = CGF.EmitLValueForField( 4809 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4810 llvm::Value *NumDeps = 4811 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4812 4813 // memcopy dependency data. 4814 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4815 ElSize, 4816 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4817 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4818 Address DepAddr = 4819 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4820 DependenciesArray.getAlignment()); 4821 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4822 4823 // Increase pos. 4824 // pos += size; 4825 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4826 CGF.EmitStoreOfScalar(Add, PosLVal); 4827 } 4828 } 4829 } 4830 4831 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4832 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4833 SourceLocation Loc) { 4834 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4835 return D.DepExprs.empty(); 4836 })) 4837 return std::make_pair(nullptr, Address::invalid()); 4838 // Process list of dependencies. 4839 ASTContext &C = CGM.getContext(); 4840 Address DependenciesArray = Address::invalid(); 4841 llvm::Value *NumOfElements = nullptr; 4842 unsigned NumDependencies = std::accumulate( 4843 Dependencies.begin(), Dependencies.end(), 0, 4844 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4845 return D.DepKind == OMPC_DEPEND_depobj 4846 ? V 4847 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4848 }); 4849 QualType FlagsTy; 4850 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4851 bool HasDepobjDeps = false; 4852 bool HasRegularWithIterators = false; 4853 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4854 llvm::Value *NumOfRegularWithIterators = 4855 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4856 // Calculate number of depobj dependecies and regular deps with the iterators. 4857 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4858 if (D.DepKind == OMPC_DEPEND_depobj) { 4859 SmallVector<llvm::Value *, 4> Sizes = 4860 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4861 for (llvm::Value *Size : Sizes) { 4862 NumOfDepobjElements = 4863 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4864 } 4865 HasDepobjDeps = true; 4866 continue; 4867 } 4868 // Include number of iterations, if any. 4869 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4870 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4871 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4872 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4873 NumOfRegularWithIterators = 4874 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4875 } 4876 HasRegularWithIterators = true; 4877 continue; 4878 } 4879 } 4880 4881 QualType KmpDependInfoArrayTy; 4882 if (HasDepobjDeps || HasRegularWithIterators) { 4883 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4884 /*isSigned=*/false); 4885 if (HasDepobjDeps) { 4886 NumOfElements = 4887 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4888 } 4889 if (HasRegularWithIterators) { 4890 NumOfElements = 4891 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4892 } 4893 OpaqueValueExpr OVE(Loc, 4894 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4895 VK_PRValue); 4896 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4897 RValue::get(NumOfElements)); 4898 KmpDependInfoArrayTy = 4899 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4900 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4901 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4902 // Properly emit variable-sized array. 4903 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4904 ImplicitParamDecl::Other); 4905 CGF.EmitVarDecl(*PD); 4906 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4907 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4908 /*isSigned=*/false); 4909 } else { 4910 KmpDependInfoArrayTy = C.getConstantArrayType( 4911 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4912 ArrayType::Normal, /*IndexTypeQuals=*/0); 4913 DependenciesArray = 4914 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4915 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4916 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4917 /*isSigned=*/false); 4918 } 4919 unsigned Pos = 0; 4920 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4921 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4922 Dependencies[I].IteratorExpr) 4923 continue; 4924 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4925 DependenciesArray); 4926 } 4927 // Copy regular dependecies with iterators. 4928 LValue PosLVal = CGF.MakeAddrLValue( 4929 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4930 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4931 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4932 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4933 !Dependencies[I].IteratorExpr) 4934 continue; 4935 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4936 DependenciesArray); 4937 } 4938 // Copy final depobj arrays without iterators. 4939 if (HasDepobjDeps) { 4940 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4941 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4942 continue; 4943 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4944 DependenciesArray); 4945 } 4946 } 4947 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4948 DependenciesArray, CGF.VoidPtrTy); 4949 return std::make_pair(NumOfElements, DependenciesArray); 4950 } 4951 4952 Address CGOpenMPRuntime::emitDepobjDependClause( 4953 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4954 SourceLocation Loc) { 4955 if (Dependencies.DepExprs.empty()) 4956 return Address::invalid(); 4957 // Process list of dependencies. 4958 ASTContext &C = CGM.getContext(); 4959 Address DependenciesArray = Address::invalid(); 4960 unsigned NumDependencies = Dependencies.DepExprs.size(); 4961 QualType FlagsTy; 4962 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4963 RecordDecl *KmpDependInfoRD = 4964 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4965 4966 llvm::Value *Size; 4967 // Define type kmp_depend_info[<Dependencies.size()>]; 4968 // For depobj reserve one extra element to store the number of elements. 4969 // It is required to handle depobj(x) update(in) construct. 4970 // kmp_depend_info[<Dependencies.size()>] deps; 4971 llvm::Value *NumDepsVal; 4972 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4973 if (const auto *IE = 4974 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4975 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4976 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4977 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4978 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4979 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4980 } 4981 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4982 NumDepsVal); 4983 CharUnits SizeInBytes = 4984 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4985 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4986 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4987 NumDepsVal = 4988 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4989 } else { 4990 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4991 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4992 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4993 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4994 Size = CGM.getSize(Sz.alignTo(Align)); 4995 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4996 } 4997 // Need to allocate on the dynamic memory. 4998 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4999 // Use default allocator. 5000 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5001 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5002 5003 llvm::Value *Addr = 5004 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5005 CGM.getModule(), OMPRTL___kmpc_alloc), 5006 Args, ".dep.arr.addr"); 5007 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5008 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5009 DependenciesArray = Address(Addr, Align); 5010 // Write number of elements in the first element of array for depobj. 5011 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5012 // deps[i].base_addr = NumDependencies; 5013 LValue BaseAddrLVal = CGF.EmitLValueForField( 5014 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5015 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5016 llvm::PointerUnion<unsigned *, LValue *> Pos; 5017 unsigned Idx = 1; 5018 LValue PosLVal; 5019 if (Dependencies.IteratorExpr) { 5020 PosLVal = CGF.MakeAddrLValue( 5021 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5022 C.getSizeType()); 5023 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5024 /*IsInit=*/true); 5025 Pos = &PosLVal; 5026 } else { 5027 Pos = &Idx; 5028 } 5029 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5030 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5031 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5032 return DependenciesArray; 5033 } 5034 5035 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5036 SourceLocation Loc) { 5037 ASTContext &C = CGM.getContext(); 5038 QualType FlagsTy; 5039 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5040 LValue Base = CGF.EmitLoadOfPointerLValue( 5041 DepobjLVal.getAddress(CGF), 5042 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5043 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5044 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5045 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5046 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5047 Addr.getPointer(), 5048 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5049 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5050 CGF.VoidPtrTy); 5051 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5052 // Use default allocator. 5053 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5054 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5055 5056 // _kmpc_free(gtid, addr, nullptr); 5057 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5058 CGM.getModule(), OMPRTL___kmpc_free), 5059 Args); 5060 } 5061 5062 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5063 OpenMPDependClauseKind NewDepKind, 5064 SourceLocation Loc) { 5065 ASTContext &C = CGM.getContext(); 5066 QualType FlagsTy; 5067 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5068 RecordDecl *KmpDependInfoRD = 5069 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5070 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5071 llvm::Value *NumDeps; 5072 LValue Base; 5073 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5074 5075 Address Begin = Base.getAddress(CGF); 5076 // Cast from pointer to array type to pointer to single element. 5077 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5078 // The basic structure here is a while-do loop. 5079 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5080 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5081 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5082 CGF.EmitBlock(BodyBB); 5083 llvm::PHINode *ElementPHI = 5084 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5085 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5086 Begin = Address(ElementPHI, Begin.getAlignment()); 5087 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5088 Base.getTBAAInfo()); 5089 // deps[i].flags = NewDepKind; 5090 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5091 LValue FlagsLVal = CGF.EmitLValueForField( 5092 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5093 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5094 FlagsLVal); 5095 5096 // Shift the address forward by one element. 5097 Address ElementNext = 5098 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5099 ElementPHI->addIncoming(ElementNext.getPointer(), 5100 CGF.Builder.GetInsertBlock()); 5101 llvm::Value *IsEmpty = 5102 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5103 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5104 // Done. 5105 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5106 } 5107 5108 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5109 const OMPExecutableDirective &D, 5110 llvm::Function *TaskFunction, 5111 QualType SharedsTy, Address Shareds, 5112 const Expr *IfCond, 5113 const OMPTaskDataTy &Data) { 5114 if (!CGF.HaveInsertPoint()) 5115 return; 5116 5117 TaskResultTy Result = 5118 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5119 llvm::Value *NewTask = Result.NewTask; 5120 llvm::Function *TaskEntry = Result.TaskEntry; 5121 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5122 LValue TDBase = Result.TDBase; 5123 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5124 // Process list of dependences. 5125 Address DependenciesArray = Address::invalid(); 5126 llvm::Value *NumOfElements; 5127 std::tie(NumOfElements, DependenciesArray) = 5128 emitDependClause(CGF, Data.Dependences, Loc); 5129 5130 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5131 // libcall. 5132 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5133 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5134 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5135 // list is not empty 5136 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5137 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5138 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5139 llvm::Value *DepTaskArgs[7]; 5140 if (!Data.Dependences.empty()) { 5141 DepTaskArgs[0] = UpLoc; 5142 DepTaskArgs[1] = ThreadID; 5143 DepTaskArgs[2] = NewTask; 5144 DepTaskArgs[3] = NumOfElements; 5145 DepTaskArgs[4] = DependenciesArray.getPointer(); 5146 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5147 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5148 } 5149 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5150 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5151 if (!Data.Tied) { 5152 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5153 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5154 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5155 } 5156 if (!Data.Dependences.empty()) { 5157 CGF.EmitRuntimeCall( 5158 OMPBuilder.getOrCreateRuntimeFunction( 5159 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5160 DepTaskArgs); 5161 } else { 5162 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5163 CGM.getModule(), OMPRTL___kmpc_omp_task), 5164 TaskArgs); 5165 } 5166 // Check if parent region is untied and build return for untied task; 5167 if (auto *Region = 5168 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5169 Region->emitUntiedSwitch(CGF); 5170 }; 5171 5172 llvm::Value *DepWaitTaskArgs[6]; 5173 if (!Data.Dependences.empty()) { 5174 DepWaitTaskArgs[0] = UpLoc; 5175 DepWaitTaskArgs[1] = ThreadID; 5176 DepWaitTaskArgs[2] = NumOfElements; 5177 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5178 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5179 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5180 } 5181 auto &M = CGM.getModule(); 5182 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5183 TaskEntry, &Data, &DepWaitTaskArgs, 5184 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5185 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5186 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5187 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5188 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5189 // is specified. 5190 if (!Data.Dependences.empty()) 5191 CGF.EmitRuntimeCall( 5192 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5193 DepWaitTaskArgs); 5194 // Call proxy_task_entry(gtid, new_task); 5195 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5196 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5197 Action.Enter(CGF); 5198 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5199 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5200 OutlinedFnArgs); 5201 }; 5202 5203 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5204 // kmp_task_t *new_task); 5205 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5206 // kmp_task_t *new_task); 5207 RegionCodeGenTy RCG(CodeGen); 5208 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5209 M, OMPRTL___kmpc_omp_task_begin_if0), 5210 TaskArgs, 5211 OMPBuilder.getOrCreateRuntimeFunction( 5212 M, OMPRTL___kmpc_omp_task_complete_if0), 5213 TaskArgs); 5214 RCG.setAction(Action); 5215 RCG(CGF); 5216 }; 5217 5218 if (IfCond) { 5219 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5220 } else { 5221 RegionCodeGenTy ThenRCG(ThenCodeGen); 5222 ThenRCG(CGF); 5223 } 5224 } 5225 5226 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5227 const OMPLoopDirective &D, 5228 llvm::Function *TaskFunction, 5229 QualType SharedsTy, Address Shareds, 5230 const Expr *IfCond, 5231 const OMPTaskDataTy &Data) { 5232 if (!CGF.HaveInsertPoint()) 5233 return; 5234 TaskResultTy Result = 5235 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5236 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5237 // libcall. 5238 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5239 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5240 // sched, kmp_uint64 grainsize, void *task_dup); 5241 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5242 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5243 llvm::Value *IfVal; 5244 if (IfCond) { 5245 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5246 /*isSigned=*/true); 5247 } else { 5248 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5249 } 5250 5251 LValue LBLVal = CGF.EmitLValueForField( 5252 Result.TDBase, 5253 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5254 const auto *LBVar = 5255 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5256 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5257 LBLVal.getQuals(), 5258 /*IsInitializer=*/true); 5259 LValue UBLVal = CGF.EmitLValueForField( 5260 Result.TDBase, 5261 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5262 const auto *UBVar = 5263 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5264 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5265 UBLVal.getQuals(), 5266 /*IsInitializer=*/true); 5267 LValue StLVal = CGF.EmitLValueForField( 5268 Result.TDBase, 5269 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5270 const auto *StVar = 5271 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5272 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5273 StLVal.getQuals(), 5274 /*IsInitializer=*/true); 5275 // Store reductions address. 5276 LValue RedLVal = CGF.EmitLValueForField( 5277 Result.TDBase, 5278 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5279 if (Data.Reductions) { 5280 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5281 } else { 5282 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5283 CGF.getContext().VoidPtrTy); 5284 } 5285 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5286 llvm::Value *TaskArgs[] = { 5287 UpLoc, 5288 ThreadID, 5289 Result.NewTask, 5290 IfVal, 5291 LBLVal.getPointer(CGF), 5292 UBLVal.getPointer(CGF), 5293 CGF.EmitLoadOfScalar(StLVal, Loc), 5294 llvm::ConstantInt::getSigned( 5295 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5296 llvm::ConstantInt::getSigned( 5297 CGF.IntTy, Data.Schedule.getPointer() 5298 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5299 : NoSchedule), 5300 Data.Schedule.getPointer() 5301 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5302 /*isSigned=*/false) 5303 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5304 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5305 Result.TaskDupFn, CGF.VoidPtrTy) 5306 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5307 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5308 CGM.getModule(), OMPRTL___kmpc_taskloop), 5309 TaskArgs); 5310 } 5311 5312 /// Emit reduction operation for each element of array (required for 5313 /// array sections) LHS op = RHS. 5314 /// \param Type Type of array. 5315 /// \param LHSVar Variable on the left side of the reduction operation 5316 /// (references element of array in original variable). 5317 /// \param RHSVar Variable on the right side of the reduction operation 5318 /// (references element of array in original variable). 5319 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5320 /// RHSVar. 5321 static void EmitOMPAggregateReduction( 5322 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5323 const VarDecl *RHSVar, 5324 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5325 const Expr *, const Expr *)> &RedOpGen, 5326 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5327 const Expr *UpExpr = nullptr) { 5328 // Perform element-by-element initialization. 5329 QualType ElementTy; 5330 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5331 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5332 5333 // Drill down to the base element type on both arrays. 5334 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5335 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5336 5337 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5338 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5339 // Cast from pointer to array type to pointer to single element. 5340 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5341 // The basic structure here is a while-do loop. 5342 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5343 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5344 llvm::Value *IsEmpty = 5345 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5346 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5347 5348 // Enter the loop body, making that address the current address. 5349 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5350 CGF.EmitBlock(BodyBB); 5351 5352 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5353 5354 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5355 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5356 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5357 Address RHSElementCurrent = 5358 Address(RHSElementPHI, 5359 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5360 5361 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5362 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5363 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5364 Address LHSElementCurrent = 5365 Address(LHSElementPHI, 5366 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5367 5368 // Emit copy. 5369 CodeGenFunction::OMPPrivateScope Scope(CGF); 5370 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5371 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5372 Scope.Privatize(); 5373 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5374 Scope.ForceCleanup(); 5375 5376 // Shift the address forward by one element. 5377 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5378 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5379 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5380 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5381 // Check whether we've reached the end. 5382 llvm::Value *Done = 5383 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5384 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5385 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5386 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5387 5388 // Done. 5389 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5390 } 5391 5392 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5393 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5394 /// UDR combiner function. 5395 static void emitReductionCombiner(CodeGenFunction &CGF, 5396 const Expr *ReductionOp) { 5397 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5398 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5399 if (const auto *DRE = 5400 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5401 if (const auto *DRD = 5402 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5403 std::pair<llvm::Function *, llvm::Function *> Reduction = 5404 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5405 RValue Func = RValue::get(Reduction.first); 5406 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5407 CGF.EmitIgnoredExpr(ReductionOp); 5408 return; 5409 } 5410 CGF.EmitIgnoredExpr(ReductionOp); 5411 } 5412 5413 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5414 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5415 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5416 ArrayRef<const Expr *> ReductionOps) { 5417 ASTContext &C = CGM.getContext(); 5418 5419 // void reduction_func(void *LHSArg, void *RHSArg); 5420 FunctionArgList Args; 5421 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5422 ImplicitParamDecl::Other); 5423 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5424 ImplicitParamDecl::Other); 5425 Args.push_back(&LHSArg); 5426 Args.push_back(&RHSArg); 5427 const auto &CGFI = 5428 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5429 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5430 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5431 llvm::GlobalValue::InternalLinkage, Name, 5432 &CGM.getModule()); 5433 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5434 Fn->setDoesNotRecurse(); 5435 CodeGenFunction CGF(CGM); 5436 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5437 5438 // Dst = (void*[n])(LHSArg); 5439 // Src = (void*[n])(RHSArg); 5440 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5441 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5442 ArgsType), CGF.getPointerAlign()); 5443 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5444 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5445 ArgsType), CGF.getPointerAlign()); 5446 5447 // ... 5448 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5449 // ... 5450 CodeGenFunction::OMPPrivateScope Scope(CGF); 5451 auto IPriv = Privates.begin(); 5452 unsigned Idx = 0; 5453 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5454 const auto *RHSVar = 5455 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5456 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5457 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5458 }); 5459 const auto *LHSVar = 5460 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5461 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5462 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5463 }); 5464 QualType PrivTy = (*IPriv)->getType(); 5465 if (PrivTy->isVariablyModifiedType()) { 5466 // Get array size and emit VLA type. 5467 ++Idx; 5468 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5469 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5470 const VariableArrayType *VLA = 5471 CGF.getContext().getAsVariableArrayType(PrivTy); 5472 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5473 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5474 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5475 CGF.EmitVariablyModifiedType(PrivTy); 5476 } 5477 } 5478 Scope.Privatize(); 5479 IPriv = Privates.begin(); 5480 auto ILHS = LHSExprs.begin(); 5481 auto IRHS = RHSExprs.begin(); 5482 for (const Expr *E : ReductionOps) { 5483 if ((*IPriv)->getType()->isArrayType()) { 5484 // Emit reduction for array section. 5485 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5486 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5487 EmitOMPAggregateReduction( 5488 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5489 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5490 emitReductionCombiner(CGF, E); 5491 }); 5492 } else { 5493 // Emit reduction for array subscript or single variable. 5494 emitReductionCombiner(CGF, E); 5495 } 5496 ++IPriv; 5497 ++ILHS; 5498 ++IRHS; 5499 } 5500 Scope.ForceCleanup(); 5501 CGF.FinishFunction(); 5502 return Fn; 5503 } 5504 5505 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5506 const Expr *ReductionOp, 5507 const Expr *PrivateRef, 5508 const DeclRefExpr *LHS, 5509 const DeclRefExpr *RHS) { 5510 if (PrivateRef->getType()->isArrayType()) { 5511 // Emit reduction for array section. 5512 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5513 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5514 EmitOMPAggregateReduction( 5515 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5516 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5517 emitReductionCombiner(CGF, ReductionOp); 5518 }); 5519 } else { 5520 // Emit reduction for array subscript or single variable. 5521 emitReductionCombiner(CGF, ReductionOp); 5522 } 5523 } 5524 5525 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5526 ArrayRef<const Expr *> Privates, 5527 ArrayRef<const Expr *> LHSExprs, 5528 ArrayRef<const Expr *> RHSExprs, 5529 ArrayRef<const Expr *> ReductionOps, 5530 ReductionOptionsTy Options) { 5531 if (!CGF.HaveInsertPoint()) 5532 return; 5533 5534 bool WithNowait = Options.WithNowait; 5535 bool SimpleReduction = Options.SimpleReduction; 5536 5537 // Next code should be emitted for reduction: 5538 // 5539 // static kmp_critical_name lock = { 0 }; 5540 // 5541 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5542 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5543 // ... 5544 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5545 // *(Type<n>-1*)rhs[<n>-1]); 5546 // } 5547 // 5548 // ... 5549 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5550 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5551 // RedList, reduce_func, &<lock>)) { 5552 // case 1: 5553 // ... 5554 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5555 // ... 5556 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5557 // break; 5558 // case 2: 5559 // ... 5560 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5561 // ... 5562 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5563 // break; 5564 // default:; 5565 // } 5566 // 5567 // if SimpleReduction is true, only the next code is generated: 5568 // ... 5569 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5570 // ... 5571 5572 ASTContext &C = CGM.getContext(); 5573 5574 if (SimpleReduction) { 5575 CodeGenFunction::RunCleanupsScope Scope(CGF); 5576 auto IPriv = Privates.begin(); 5577 auto ILHS = LHSExprs.begin(); 5578 auto IRHS = RHSExprs.begin(); 5579 for (const Expr *E : ReductionOps) { 5580 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5581 cast<DeclRefExpr>(*IRHS)); 5582 ++IPriv; 5583 ++ILHS; 5584 ++IRHS; 5585 } 5586 return; 5587 } 5588 5589 // 1. Build a list of reduction variables. 5590 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5591 auto Size = RHSExprs.size(); 5592 for (const Expr *E : Privates) { 5593 if (E->getType()->isVariablyModifiedType()) 5594 // Reserve place for array size. 5595 ++Size; 5596 } 5597 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5598 QualType ReductionArrayTy = 5599 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5600 /*IndexTypeQuals=*/0); 5601 Address ReductionList = 5602 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5603 auto IPriv = Privates.begin(); 5604 unsigned Idx = 0; 5605 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5606 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5607 CGF.Builder.CreateStore( 5608 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5609 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5610 Elem); 5611 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5612 // Store array size. 5613 ++Idx; 5614 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5615 llvm::Value *Size = CGF.Builder.CreateIntCast( 5616 CGF.getVLASize( 5617 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5618 .NumElts, 5619 CGF.SizeTy, /*isSigned=*/false); 5620 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5621 Elem); 5622 } 5623 } 5624 5625 // 2. Emit reduce_func(). 5626 llvm::Function *ReductionFn = emitReductionFunction( 5627 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5628 LHSExprs, RHSExprs, ReductionOps); 5629 5630 // 3. Create static kmp_critical_name lock = { 0 }; 5631 std::string Name = getName({"reduction"}); 5632 llvm::Value *Lock = getCriticalRegionLock(Name); 5633 5634 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5635 // RedList, reduce_func, &<lock>); 5636 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5637 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5638 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5639 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5640 ReductionList.getPointer(), CGF.VoidPtrTy); 5641 llvm::Value *Args[] = { 5642 IdentTLoc, // ident_t *<loc> 5643 ThreadId, // i32 <gtid> 5644 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5645 ReductionArrayTySize, // size_type sizeof(RedList) 5646 RL, // void *RedList 5647 ReductionFn, // void (*) (void *, void *) <reduce_func> 5648 Lock // kmp_critical_name *&<lock> 5649 }; 5650 llvm::Value *Res = CGF.EmitRuntimeCall( 5651 OMPBuilder.getOrCreateRuntimeFunction( 5652 CGM.getModule(), 5653 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5654 Args); 5655 5656 // 5. Build switch(res) 5657 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5658 llvm::SwitchInst *SwInst = 5659 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5660 5661 // 6. Build case 1: 5662 // ... 5663 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5664 // ... 5665 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5666 // break; 5667 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5668 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5669 CGF.EmitBlock(Case1BB); 5670 5671 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5672 llvm::Value *EndArgs[] = { 5673 IdentTLoc, // ident_t *<loc> 5674 ThreadId, // i32 <gtid> 5675 Lock // kmp_critical_name *&<lock> 5676 }; 5677 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5678 CodeGenFunction &CGF, PrePostActionTy &Action) { 5679 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5680 auto IPriv = Privates.begin(); 5681 auto ILHS = LHSExprs.begin(); 5682 auto IRHS = RHSExprs.begin(); 5683 for (const Expr *E : ReductionOps) { 5684 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5685 cast<DeclRefExpr>(*IRHS)); 5686 ++IPriv; 5687 ++ILHS; 5688 ++IRHS; 5689 } 5690 }; 5691 RegionCodeGenTy RCG(CodeGen); 5692 CommonActionTy Action( 5693 nullptr, llvm::None, 5694 OMPBuilder.getOrCreateRuntimeFunction( 5695 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5696 : OMPRTL___kmpc_end_reduce), 5697 EndArgs); 5698 RCG.setAction(Action); 5699 RCG(CGF); 5700 5701 CGF.EmitBranch(DefaultBB); 5702 5703 // 7. Build case 2: 5704 // ... 5705 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5706 // ... 5707 // break; 5708 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5709 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5710 CGF.EmitBlock(Case2BB); 5711 5712 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5713 CodeGenFunction &CGF, PrePostActionTy &Action) { 5714 auto ILHS = LHSExprs.begin(); 5715 auto IRHS = RHSExprs.begin(); 5716 auto IPriv = Privates.begin(); 5717 for (const Expr *E : ReductionOps) { 5718 const Expr *XExpr = nullptr; 5719 const Expr *EExpr = nullptr; 5720 const Expr *UpExpr = nullptr; 5721 BinaryOperatorKind BO = BO_Comma; 5722 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5723 if (BO->getOpcode() == BO_Assign) { 5724 XExpr = BO->getLHS(); 5725 UpExpr = BO->getRHS(); 5726 } 5727 } 5728 // Try to emit update expression as a simple atomic. 5729 const Expr *RHSExpr = UpExpr; 5730 if (RHSExpr) { 5731 // Analyze RHS part of the whole expression. 5732 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5733 RHSExpr->IgnoreParenImpCasts())) { 5734 // If this is a conditional operator, analyze its condition for 5735 // min/max reduction operator. 5736 RHSExpr = ACO->getCond(); 5737 } 5738 if (const auto *BORHS = 5739 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5740 EExpr = BORHS->getRHS(); 5741 BO = BORHS->getOpcode(); 5742 } 5743 } 5744 if (XExpr) { 5745 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5746 auto &&AtomicRedGen = [BO, VD, 5747 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5748 const Expr *EExpr, const Expr *UpExpr) { 5749 LValue X = CGF.EmitLValue(XExpr); 5750 RValue E; 5751 if (EExpr) 5752 E = CGF.EmitAnyExpr(EExpr); 5753 CGF.EmitOMPAtomicSimpleUpdateExpr( 5754 X, E, BO, /*IsXLHSInRHSPart=*/true, 5755 llvm::AtomicOrdering::Monotonic, Loc, 5756 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5757 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5758 PrivateScope.addPrivate( 5759 VD, [&CGF, VD, XRValue, Loc]() { 5760 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5761 CGF.emitOMPSimpleStore( 5762 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5763 VD->getType().getNonReferenceType(), Loc); 5764 return LHSTemp; 5765 }); 5766 (void)PrivateScope.Privatize(); 5767 return CGF.EmitAnyExpr(UpExpr); 5768 }); 5769 }; 5770 if ((*IPriv)->getType()->isArrayType()) { 5771 // Emit atomic reduction for array section. 5772 const auto *RHSVar = 5773 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5774 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5775 AtomicRedGen, XExpr, EExpr, UpExpr); 5776 } else { 5777 // Emit atomic reduction for array subscript or single variable. 5778 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5779 } 5780 } else { 5781 // Emit as a critical region. 5782 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5783 const Expr *, const Expr *) { 5784 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5785 std::string Name = RT.getName({"atomic_reduction"}); 5786 RT.emitCriticalRegion( 5787 CGF, Name, 5788 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5789 Action.Enter(CGF); 5790 emitReductionCombiner(CGF, E); 5791 }, 5792 Loc); 5793 }; 5794 if ((*IPriv)->getType()->isArrayType()) { 5795 const auto *LHSVar = 5796 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5797 const auto *RHSVar = 5798 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5799 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5800 CritRedGen); 5801 } else { 5802 CritRedGen(CGF, nullptr, nullptr, nullptr); 5803 } 5804 } 5805 ++ILHS; 5806 ++IRHS; 5807 ++IPriv; 5808 } 5809 }; 5810 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5811 if (!WithNowait) { 5812 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5813 llvm::Value *EndArgs[] = { 5814 IdentTLoc, // ident_t *<loc> 5815 ThreadId, // i32 <gtid> 5816 Lock // kmp_critical_name *&<lock> 5817 }; 5818 CommonActionTy Action(nullptr, llvm::None, 5819 OMPBuilder.getOrCreateRuntimeFunction( 5820 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5821 EndArgs); 5822 AtomicRCG.setAction(Action); 5823 AtomicRCG(CGF); 5824 } else { 5825 AtomicRCG(CGF); 5826 } 5827 5828 CGF.EmitBranch(DefaultBB); 5829 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5830 } 5831 5832 /// Generates unique name for artificial threadprivate variables. 5833 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5834 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5835 const Expr *Ref) { 5836 SmallString<256> Buffer; 5837 llvm::raw_svector_ostream Out(Buffer); 5838 const clang::DeclRefExpr *DE; 5839 const VarDecl *D = ::getBaseDecl(Ref, DE); 5840 if (!D) 5841 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5842 D = D->getCanonicalDecl(); 5843 std::string Name = CGM.getOpenMPRuntime().getName( 5844 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5845 Out << Prefix << Name << "_" 5846 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5847 return std::string(Out.str()); 5848 } 5849 5850 /// Emits reduction initializer function: 5851 /// \code 5852 /// void @.red_init(void* %arg, void* %orig) { 5853 /// %0 = bitcast void* %arg to <type>* 5854 /// store <type> <init>, <type>* %0 5855 /// ret void 5856 /// } 5857 /// \endcode 5858 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5859 SourceLocation Loc, 5860 ReductionCodeGen &RCG, unsigned N) { 5861 ASTContext &C = CGM.getContext(); 5862 QualType VoidPtrTy = C.VoidPtrTy; 5863 VoidPtrTy.addRestrict(); 5864 FunctionArgList Args; 5865 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5866 ImplicitParamDecl::Other); 5867 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5868 ImplicitParamDecl::Other); 5869 Args.emplace_back(&Param); 5870 Args.emplace_back(&ParamOrig); 5871 const auto &FnInfo = 5872 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5873 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5874 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5875 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5876 Name, &CGM.getModule()); 5877 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5878 Fn->setDoesNotRecurse(); 5879 CodeGenFunction CGF(CGM); 5880 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5881 Address PrivateAddr = CGF.EmitLoadOfPointer( 5882 CGF.GetAddrOfLocalVar(&Param), 5883 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5884 llvm::Value *Size = nullptr; 5885 // If the size of the reduction item is non-constant, load it from global 5886 // threadprivate variable. 5887 if (RCG.getSizes(N).second) { 5888 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5889 CGF, CGM.getContext().getSizeType(), 5890 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5891 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5892 CGM.getContext().getSizeType(), Loc); 5893 } 5894 RCG.emitAggregateType(CGF, N, Size); 5895 LValue OrigLVal; 5896 // If initializer uses initializer from declare reduction construct, emit a 5897 // pointer to the address of the original reduction item (reuired by reduction 5898 // initializer) 5899 if (RCG.usesReductionInitializer(N)) { 5900 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5901 SharedAddr = CGF.EmitLoadOfPointer( 5902 SharedAddr, 5903 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5904 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5905 } else { 5906 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5907 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5908 CGM.getContext().VoidPtrTy); 5909 } 5910 // Emit the initializer: 5911 // %0 = bitcast void* %arg to <type>* 5912 // store <type> <init>, <type>* %0 5913 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5914 [](CodeGenFunction &) { return false; }); 5915 CGF.FinishFunction(); 5916 return Fn; 5917 } 5918 5919 /// Emits reduction combiner function: 5920 /// \code 5921 /// void @.red_comb(void* %arg0, void* %arg1) { 5922 /// %lhs = bitcast void* %arg0 to <type>* 5923 /// %rhs = bitcast void* %arg1 to <type>* 5924 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5925 /// store <type> %2, <type>* %lhs 5926 /// ret void 5927 /// } 5928 /// \endcode 5929 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5930 SourceLocation Loc, 5931 ReductionCodeGen &RCG, unsigned N, 5932 const Expr *ReductionOp, 5933 const Expr *LHS, const Expr *RHS, 5934 const Expr *PrivateRef) { 5935 ASTContext &C = CGM.getContext(); 5936 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5937 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5938 FunctionArgList Args; 5939 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5940 C.VoidPtrTy, ImplicitParamDecl::Other); 5941 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5942 ImplicitParamDecl::Other); 5943 Args.emplace_back(&ParamInOut); 5944 Args.emplace_back(&ParamIn); 5945 const auto &FnInfo = 5946 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5947 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5948 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5949 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5950 Name, &CGM.getModule()); 5951 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5952 Fn->setDoesNotRecurse(); 5953 CodeGenFunction CGF(CGM); 5954 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5955 llvm::Value *Size = nullptr; 5956 // If the size of the reduction item is non-constant, load it from global 5957 // threadprivate variable. 5958 if (RCG.getSizes(N).second) { 5959 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5960 CGF, CGM.getContext().getSizeType(), 5961 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5962 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5963 CGM.getContext().getSizeType(), Loc); 5964 } 5965 RCG.emitAggregateType(CGF, N, Size); 5966 // Remap lhs and rhs variables to the addresses of the function arguments. 5967 // %lhs = bitcast void* %arg0 to <type>* 5968 // %rhs = bitcast void* %arg1 to <type>* 5969 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5970 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5971 // Pull out the pointer to the variable. 5972 Address PtrAddr = CGF.EmitLoadOfPointer( 5973 CGF.GetAddrOfLocalVar(&ParamInOut), 5974 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5975 return CGF.Builder.CreateElementBitCast( 5976 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5977 }); 5978 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5979 // Pull out the pointer to the variable. 5980 Address PtrAddr = CGF.EmitLoadOfPointer( 5981 CGF.GetAddrOfLocalVar(&ParamIn), 5982 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5983 return CGF.Builder.CreateElementBitCast( 5984 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5985 }); 5986 PrivateScope.Privatize(); 5987 // Emit the combiner body: 5988 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5989 // store <type> %2, <type>* %lhs 5990 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5991 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5992 cast<DeclRefExpr>(RHS)); 5993 CGF.FinishFunction(); 5994 return Fn; 5995 } 5996 5997 /// Emits reduction finalizer function: 5998 /// \code 5999 /// void @.red_fini(void* %arg) { 6000 /// %0 = bitcast void* %arg to <type>* 6001 /// <destroy>(<type>* %0) 6002 /// ret void 6003 /// } 6004 /// \endcode 6005 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6006 SourceLocation Loc, 6007 ReductionCodeGen &RCG, unsigned N) { 6008 if (!RCG.needCleanups(N)) 6009 return nullptr; 6010 ASTContext &C = CGM.getContext(); 6011 FunctionArgList Args; 6012 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6013 ImplicitParamDecl::Other); 6014 Args.emplace_back(&Param); 6015 const auto &FnInfo = 6016 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6017 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6018 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6019 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6020 Name, &CGM.getModule()); 6021 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6022 Fn->setDoesNotRecurse(); 6023 CodeGenFunction CGF(CGM); 6024 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6025 Address PrivateAddr = CGF.EmitLoadOfPointer( 6026 CGF.GetAddrOfLocalVar(&Param), 6027 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6028 llvm::Value *Size = nullptr; 6029 // If the size of the reduction item is non-constant, load it from global 6030 // threadprivate variable. 6031 if (RCG.getSizes(N).second) { 6032 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6033 CGF, CGM.getContext().getSizeType(), 6034 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6035 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6036 CGM.getContext().getSizeType(), Loc); 6037 } 6038 RCG.emitAggregateType(CGF, N, Size); 6039 // Emit the finalizer body: 6040 // <destroy>(<type>* %0) 6041 RCG.emitCleanups(CGF, N, PrivateAddr); 6042 CGF.FinishFunction(Loc); 6043 return Fn; 6044 } 6045 6046 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6047 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6048 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6049 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6050 return nullptr; 6051 6052 // Build typedef struct: 6053 // kmp_taskred_input { 6054 // void *reduce_shar; // shared reduction item 6055 // void *reduce_orig; // original reduction item used for initialization 6056 // size_t reduce_size; // size of data item 6057 // void *reduce_init; // data initialization routine 6058 // void *reduce_fini; // data finalization routine 6059 // void *reduce_comb; // data combiner routine 6060 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6061 // } kmp_taskred_input_t; 6062 ASTContext &C = CGM.getContext(); 6063 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6064 RD->startDefinition(); 6065 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6066 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6067 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6068 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6069 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6070 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6071 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6072 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6073 RD->completeDefinition(); 6074 QualType RDType = C.getRecordType(RD); 6075 unsigned Size = Data.ReductionVars.size(); 6076 llvm::APInt ArraySize(/*numBits=*/64, Size); 6077 QualType ArrayRDType = C.getConstantArrayType( 6078 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6079 // kmp_task_red_input_t .rd_input.[Size]; 6080 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6081 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6082 Data.ReductionCopies, Data.ReductionOps); 6083 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6084 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6085 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6086 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6087 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6088 TaskRedInput.getPointer(), Idxs, 6089 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6090 ".rd_input.gep."); 6091 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6092 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6093 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6094 RCG.emitSharedOrigLValue(CGF, Cnt); 6095 llvm::Value *CastedShared = 6096 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6097 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6098 // ElemLVal.reduce_orig = &Origs[Cnt]; 6099 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6100 llvm::Value *CastedOrig = 6101 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6102 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6103 RCG.emitAggregateType(CGF, Cnt); 6104 llvm::Value *SizeValInChars; 6105 llvm::Value *SizeVal; 6106 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6107 // We use delayed creation/initialization for VLAs and array sections. It is 6108 // required because runtime does not provide the way to pass the sizes of 6109 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6110 // threadprivate global variables are used to store these values and use 6111 // them in the functions. 6112 bool DelayedCreation = !!SizeVal; 6113 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6114 /*isSigned=*/false); 6115 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6116 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6117 // ElemLVal.reduce_init = init; 6118 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6119 llvm::Value *InitAddr = 6120 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6121 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6122 // ElemLVal.reduce_fini = fini; 6123 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6124 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6125 llvm::Value *FiniAddr = Fini 6126 ? CGF.EmitCastToVoidPtr(Fini) 6127 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6128 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6129 // ElemLVal.reduce_comb = comb; 6130 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6131 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6132 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6133 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6134 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6135 // ElemLVal.flags = 0; 6136 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6137 if (DelayedCreation) { 6138 CGF.EmitStoreOfScalar( 6139 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6140 FlagsLVal); 6141 } else 6142 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6143 FlagsLVal.getType()); 6144 } 6145 if (Data.IsReductionWithTaskMod) { 6146 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6147 // is_ws, int num, void *data); 6148 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6149 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6150 CGM.IntTy, /*isSigned=*/true); 6151 llvm::Value *Args[] = { 6152 IdentTLoc, GTid, 6153 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6154 /*isSigned=*/true), 6155 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6156 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6157 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6158 return CGF.EmitRuntimeCall( 6159 OMPBuilder.getOrCreateRuntimeFunction( 6160 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6161 Args); 6162 } 6163 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6164 llvm::Value *Args[] = { 6165 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6166 /*isSigned=*/true), 6167 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6169 CGM.VoidPtrTy)}; 6170 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6171 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6172 Args); 6173 } 6174 6175 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6176 SourceLocation Loc, 6177 bool IsWorksharingReduction) { 6178 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6179 // is_ws, int num, void *data); 6180 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6181 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6182 CGM.IntTy, /*isSigned=*/true); 6183 llvm::Value *Args[] = {IdentTLoc, GTid, 6184 llvm::ConstantInt::get(CGM.IntTy, 6185 IsWorksharingReduction ? 1 : 0, 6186 /*isSigned=*/true)}; 6187 (void)CGF.EmitRuntimeCall( 6188 OMPBuilder.getOrCreateRuntimeFunction( 6189 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6190 Args); 6191 } 6192 6193 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6194 SourceLocation Loc, 6195 ReductionCodeGen &RCG, 6196 unsigned N) { 6197 auto Sizes = RCG.getSizes(N); 6198 // Emit threadprivate global variable if the type is non-constant 6199 // (Sizes.second = nullptr). 6200 if (Sizes.second) { 6201 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6202 /*isSigned=*/false); 6203 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6204 CGF, CGM.getContext().getSizeType(), 6205 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6206 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6207 } 6208 } 6209 6210 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6211 SourceLocation Loc, 6212 llvm::Value *ReductionsPtr, 6213 LValue SharedLVal) { 6214 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6215 // *d); 6216 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6217 CGM.IntTy, 6218 /*isSigned=*/true), 6219 ReductionsPtr, 6220 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6221 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6222 return Address( 6223 CGF.EmitRuntimeCall( 6224 OMPBuilder.getOrCreateRuntimeFunction( 6225 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6226 Args), 6227 SharedLVal.getAlignment()); 6228 } 6229 6230 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6231 SourceLocation Loc) { 6232 if (!CGF.HaveInsertPoint()) 6233 return; 6234 6235 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6236 OMPBuilder.createTaskwait(CGF.Builder); 6237 } else { 6238 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6239 // global_tid); 6240 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6241 // Ignore return result until untied tasks are supported. 6242 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6243 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6244 Args); 6245 } 6246 6247 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6248 Region->emitUntiedSwitch(CGF); 6249 } 6250 6251 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6252 OpenMPDirectiveKind InnerKind, 6253 const RegionCodeGenTy &CodeGen, 6254 bool HasCancel) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6258 InnerKind != OMPD_critical && 6259 InnerKind != OMPD_master && 6260 InnerKind != OMPD_masked); 6261 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6262 } 6263 6264 namespace { 6265 enum RTCancelKind { 6266 CancelNoreq = 0, 6267 CancelParallel = 1, 6268 CancelLoop = 2, 6269 CancelSections = 3, 6270 CancelTaskgroup = 4 6271 }; 6272 } // anonymous namespace 6273 6274 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6275 RTCancelKind CancelKind = CancelNoreq; 6276 if (CancelRegion == OMPD_parallel) 6277 CancelKind = CancelParallel; 6278 else if (CancelRegion == OMPD_for) 6279 CancelKind = CancelLoop; 6280 else if (CancelRegion == OMPD_sections) 6281 CancelKind = CancelSections; 6282 else { 6283 assert(CancelRegion == OMPD_taskgroup); 6284 CancelKind = CancelTaskgroup; 6285 } 6286 return CancelKind; 6287 } 6288 6289 void CGOpenMPRuntime::emitCancellationPointCall( 6290 CodeGenFunction &CGF, SourceLocation Loc, 6291 OpenMPDirectiveKind CancelRegion) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6295 // global_tid, kmp_int32 cncl_kind); 6296 if (auto *OMPRegionInfo = 6297 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6298 // For 'cancellation point taskgroup', the task region info may not have a 6299 // cancel. This may instead happen in another adjacent task. 6300 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6301 llvm::Value *Args[] = { 6302 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6303 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6304 // Ignore return result until untied tasks are supported. 6305 llvm::Value *Result = CGF.EmitRuntimeCall( 6306 OMPBuilder.getOrCreateRuntimeFunction( 6307 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6308 Args); 6309 // if (__kmpc_cancellationpoint()) { 6310 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6311 // exit from construct; 6312 // } 6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6317 CGF.EmitBlock(ExitBB); 6318 if (CancelRegion == OMPD_parallel) 6319 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6320 // exit from construct; 6321 CodeGenFunction::JumpDest CancelDest = 6322 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6323 CGF.EmitBranchThroughCleanup(CancelDest); 6324 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6325 } 6326 } 6327 } 6328 6329 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6330 const Expr *IfCond, 6331 OpenMPDirectiveKind CancelRegion) { 6332 if (!CGF.HaveInsertPoint()) 6333 return; 6334 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6335 // kmp_int32 cncl_kind); 6336 auto &M = CGM.getModule(); 6337 if (auto *OMPRegionInfo = 6338 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6339 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6340 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6341 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6342 llvm::Value *Args[] = { 6343 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6344 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6345 // Ignore return result until untied tasks are supported. 6346 llvm::Value *Result = CGF.EmitRuntimeCall( 6347 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6348 // if (__kmpc_cancel()) { 6349 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6350 // exit from construct; 6351 // } 6352 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6353 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6354 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6355 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6356 CGF.EmitBlock(ExitBB); 6357 if (CancelRegion == OMPD_parallel) 6358 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6359 // exit from construct; 6360 CodeGenFunction::JumpDest CancelDest = 6361 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6362 CGF.EmitBranchThroughCleanup(CancelDest); 6363 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6364 }; 6365 if (IfCond) { 6366 emitIfClause(CGF, IfCond, ThenGen, 6367 [](CodeGenFunction &, PrePostActionTy &) {}); 6368 } else { 6369 RegionCodeGenTy ThenRCG(ThenGen); 6370 ThenRCG(CGF); 6371 } 6372 } 6373 } 6374 6375 namespace { 6376 /// Cleanup action for uses_allocators support. 6377 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6378 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6379 6380 public: 6381 OMPUsesAllocatorsActionTy( 6382 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6383 : Allocators(Allocators) {} 6384 void Enter(CodeGenFunction &CGF) override { 6385 if (!CGF.HaveInsertPoint()) 6386 return; 6387 for (const auto &AllocatorData : Allocators) { 6388 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6389 CGF, AllocatorData.first, AllocatorData.second); 6390 } 6391 } 6392 void Exit(CodeGenFunction &CGF) override { 6393 if (!CGF.HaveInsertPoint()) 6394 return; 6395 for (const auto &AllocatorData : Allocators) { 6396 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6397 AllocatorData.first); 6398 } 6399 } 6400 }; 6401 } // namespace 6402 6403 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6404 const OMPExecutableDirective &D, StringRef ParentName, 6405 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6406 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6407 assert(!ParentName.empty() && "Invalid target region parent name!"); 6408 HasEmittedTargetRegion = true; 6409 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6410 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6411 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6412 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6413 if (!D.AllocatorTraits) 6414 continue; 6415 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6416 } 6417 } 6418 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6419 CodeGen.setAction(UsesAllocatorAction); 6420 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6421 IsOffloadEntry, CodeGen); 6422 } 6423 6424 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6425 const Expr *Allocator, 6426 const Expr *AllocatorTraits) { 6427 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6428 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6429 // Use default memspace handle. 6430 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6431 llvm::Value *NumTraits = llvm::ConstantInt::get( 6432 CGF.IntTy, cast<ConstantArrayType>( 6433 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6434 ->getSize() 6435 .getLimitedValue()); 6436 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6437 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6438 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6439 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6440 AllocatorTraitsLVal.getBaseInfo(), 6441 AllocatorTraitsLVal.getTBAAInfo()); 6442 llvm::Value *Traits = 6443 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6444 6445 llvm::Value *AllocatorVal = 6446 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6447 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6448 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6449 // Store to allocator. 6450 CGF.EmitVarDecl(*cast<VarDecl>( 6451 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6452 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6453 AllocatorVal = 6454 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6455 Allocator->getType(), Allocator->getExprLoc()); 6456 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6457 } 6458 6459 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6460 const Expr *Allocator) { 6461 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6462 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6463 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6464 llvm::Value *AllocatorVal = 6465 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6466 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6467 CGF.getContext().VoidPtrTy, 6468 Allocator->getExprLoc()); 6469 (void)CGF.EmitRuntimeCall( 6470 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6471 OMPRTL___kmpc_destroy_allocator), 6472 {ThreadId, AllocatorVal}); 6473 } 6474 6475 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6476 const OMPExecutableDirective &D, StringRef ParentName, 6477 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6478 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6479 // Create a unique name for the entry function using the source location 6480 // information of the current target region. The name will be something like: 6481 // 6482 // __omp_offloading_DD_FFFF_PP_lBB 6483 // 6484 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6485 // mangled name of the function that encloses the target region and BB is the 6486 // line number of the target region. 6487 6488 unsigned DeviceID; 6489 unsigned FileID; 6490 unsigned Line; 6491 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6492 Line); 6493 SmallString<64> EntryFnName; 6494 { 6495 llvm::raw_svector_ostream OS(EntryFnName); 6496 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6497 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6498 } 6499 6500 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6501 6502 CodeGenFunction CGF(CGM, true); 6503 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6505 6506 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6507 6508 // If this target outline function is not an offload entry, we don't need to 6509 // register it. 6510 if (!IsOffloadEntry) 6511 return; 6512 6513 // The target region ID is used by the runtime library to identify the current 6514 // target region, so it only has to be unique and not necessarily point to 6515 // anything. It could be the pointer to the outlined function that implements 6516 // the target region, but we aren't using that so that the compiler doesn't 6517 // need to keep that, and could therefore inline the host function if proven 6518 // worthwhile during optimization. In the other hand, if emitting code for the 6519 // device, the ID has to be the function address so that it can retrieved from 6520 // the offloading entry and launched by the runtime library. We also mark the 6521 // outlined function to have external linkage in case we are emitting code for 6522 // the device, because these functions will be entry points to the device. 6523 6524 if (CGM.getLangOpts().OpenMPIsDevice) { 6525 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6526 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6527 OutlinedFn->setDSOLocal(false); 6528 if (CGM.getTriple().isAMDGCN()) 6529 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6530 } else { 6531 std::string Name = getName({EntryFnName, "region_id"}); 6532 OutlinedFnID = new llvm::GlobalVariable( 6533 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6534 llvm::GlobalValue::WeakAnyLinkage, 6535 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6536 } 6537 6538 // Register the information for the entry associated with this target region. 6539 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6540 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6541 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6542 } 6543 6544 /// Checks if the expression is constant or does not have non-trivial function 6545 /// calls. 6546 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6547 // We can skip constant expressions. 6548 // We can skip expressions with trivial calls or simple expressions. 6549 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6550 !E->hasNonTrivialCall(Ctx)) && 6551 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6552 } 6553 6554 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6555 const Stmt *Body) { 6556 const Stmt *Child = Body->IgnoreContainers(); 6557 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6558 Child = nullptr; 6559 for (const Stmt *S : C->body()) { 6560 if (const auto *E = dyn_cast<Expr>(S)) { 6561 if (isTrivial(Ctx, E)) 6562 continue; 6563 } 6564 // Some of the statements can be ignored. 6565 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6566 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6567 continue; 6568 // Analyze declarations. 6569 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6570 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6571 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6572 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6573 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6574 isa<UsingDirectiveDecl>(D) || 6575 isa<OMPDeclareReductionDecl>(D) || 6576 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6577 return true; 6578 const auto *VD = dyn_cast<VarDecl>(D); 6579 if (!VD) 6580 return false; 6581 return VD->hasGlobalStorage() || !VD->isUsed(); 6582 })) 6583 continue; 6584 } 6585 // Found multiple children - cannot get the one child only. 6586 if (Child) 6587 return nullptr; 6588 Child = S; 6589 } 6590 if (Child) 6591 Child = Child->IgnoreContainers(); 6592 } 6593 return Child; 6594 } 6595 6596 /// Emit the number of teams for a target directive. Inspect the num_teams 6597 /// clause associated with a teams construct combined or closely nested 6598 /// with the target directive. 6599 /// 6600 /// Emit a team of size one for directives such as 'target parallel' that 6601 /// have no associated teams construct. 6602 /// 6603 /// Otherwise, return nullptr. 6604 static llvm::Value * 6605 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6606 const OMPExecutableDirective &D) { 6607 assert(!CGF.getLangOpts().OpenMPIsDevice && 6608 "Clauses associated with the teams directive expected to be emitted " 6609 "only for the host!"); 6610 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6611 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6612 "Expected target-based executable directive."); 6613 CGBuilderTy &Bld = CGF.Builder; 6614 switch (DirectiveKind) { 6615 case OMPD_target: { 6616 const auto *CS = D.getInnermostCapturedStmt(); 6617 const auto *Body = 6618 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6619 const Stmt *ChildStmt = 6620 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6621 if (const auto *NestedDir = 6622 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6623 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6624 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6625 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6626 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6627 const Expr *NumTeams = 6628 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6629 llvm::Value *NumTeamsVal = 6630 CGF.EmitScalarExpr(NumTeams, 6631 /*IgnoreResultAssign*/ true); 6632 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6633 /*isSigned=*/true); 6634 } 6635 return Bld.getInt32(0); 6636 } 6637 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6638 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6639 return Bld.getInt32(1); 6640 return Bld.getInt32(0); 6641 } 6642 return nullptr; 6643 } 6644 case OMPD_target_teams: 6645 case OMPD_target_teams_distribute: 6646 case OMPD_target_teams_distribute_simd: 6647 case OMPD_target_teams_distribute_parallel_for: 6648 case OMPD_target_teams_distribute_parallel_for_simd: { 6649 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6650 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6651 const Expr *NumTeams = 6652 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6653 llvm::Value *NumTeamsVal = 6654 CGF.EmitScalarExpr(NumTeams, 6655 /*IgnoreResultAssign*/ true); 6656 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6657 /*isSigned=*/true); 6658 } 6659 return Bld.getInt32(0); 6660 } 6661 case OMPD_target_parallel: 6662 case OMPD_target_parallel_for: 6663 case OMPD_target_parallel_for_simd: 6664 case OMPD_target_simd: 6665 return Bld.getInt32(1); 6666 case OMPD_parallel: 6667 case OMPD_for: 6668 case OMPD_parallel_for: 6669 case OMPD_parallel_master: 6670 case OMPD_parallel_sections: 6671 case OMPD_for_simd: 6672 case OMPD_parallel_for_simd: 6673 case OMPD_cancel: 6674 case OMPD_cancellation_point: 6675 case OMPD_ordered: 6676 case OMPD_threadprivate: 6677 case OMPD_allocate: 6678 case OMPD_task: 6679 case OMPD_simd: 6680 case OMPD_tile: 6681 case OMPD_unroll: 6682 case OMPD_sections: 6683 case OMPD_section: 6684 case OMPD_single: 6685 case OMPD_master: 6686 case OMPD_critical: 6687 case OMPD_taskyield: 6688 case OMPD_barrier: 6689 case OMPD_taskwait: 6690 case OMPD_taskgroup: 6691 case OMPD_atomic: 6692 case OMPD_flush: 6693 case OMPD_depobj: 6694 case OMPD_scan: 6695 case OMPD_teams: 6696 case OMPD_target_data: 6697 case OMPD_target_exit_data: 6698 case OMPD_target_enter_data: 6699 case OMPD_distribute: 6700 case OMPD_distribute_simd: 6701 case OMPD_distribute_parallel_for: 6702 case OMPD_distribute_parallel_for_simd: 6703 case OMPD_teams_distribute: 6704 case OMPD_teams_distribute_simd: 6705 case OMPD_teams_distribute_parallel_for: 6706 case OMPD_teams_distribute_parallel_for_simd: 6707 case OMPD_target_update: 6708 case OMPD_declare_simd: 6709 case OMPD_declare_variant: 6710 case OMPD_begin_declare_variant: 6711 case OMPD_end_declare_variant: 6712 case OMPD_declare_target: 6713 case OMPD_end_declare_target: 6714 case OMPD_declare_reduction: 6715 case OMPD_declare_mapper: 6716 case OMPD_taskloop: 6717 case OMPD_taskloop_simd: 6718 case OMPD_master_taskloop: 6719 case OMPD_master_taskloop_simd: 6720 case OMPD_parallel_master_taskloop: 6721 case OMPD_parallel_master_taskloop_simd: 6722 case OMPD_requires: 6723 case OMPD_unknown: 6724 break; 6725 default: 6726 break; 6727 } 6728 llvm_unreachable("Unexpected directive kind."); 6729 } 6730 6731 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6732 llvm::Value *DefaultThreadLimitVal) { 6733 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6734 CGF.getContext(), CS->getCapturedStmt()); 6735 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6736 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6737 llvm::Value *NumThreads = nullptr; 6738 llvm::Value *CondVal = nullptr; 6739 // Handle if clause. If if clause present, the number of threads is 6740 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6741 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6742 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6743 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6744 const OMPIfClause *IfClause = nullptr; 6745 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6746 if (C->getNameModifier() == OMPD_unknown || 6747 C->getNameModifier() == OMPD_parallel) { 6748 IfClause = C; 6749 break; 6750 } 6751 } 6752 if (IfClause) { 6753 const Expr *Cond = IfClause->getCondition(); 6754 bool Result; 6755 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6756 if (!Result) 6757 return CGF.Builder.getInt32(1); 6758 } else { 6759 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6760 if (const auto *PreInit = 6761 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6762 for (const auto *I : PreInit->decls()) { 6763 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6764 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6765 } else { 6766 CodeGenFunction::AutoVarEmission Emission = 6767 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6768 CGF.EmitAutoVarCleanups(Emission); 6769 } 6770 } 6771 } 6772 CondVal = CGF.EvaluateExprAsBool(Cond); 6773 } 6774 } 6775 } 6776 // Check the value of num_threads clause iff if clause was not specified 6777 // or is not evaluated to false. 6778 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6779 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6780 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6781 const auto *NumThreadsClause = 6782 Dir->getSingleClause<OMPNumThreadsClause>(); 6783 CodeGenFunction::LexicalScope Scope( 6784 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6785 if (const auto *PreInit = 6786 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6787 for (const auto *I : PreInit->decls()) { 6788 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6789 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6790 } else { 6791 CodeGenFunction::AutoVarEmission Emission = 6792 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6793 CGF.EmitAutoVarCleanups(Emission); 6794 } 6795 } 6796 } 6797 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6798 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6799 /*isSigned=*/false); 6800 if (DefaultThreadLimitVal) 6801 NumThreads = CGF.Builder.CreateSelect( 6802 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6803 DefaultThreadLimitVal, NumThreads); 6804 } else { 6805 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6806 : CGF.Builder.getInt32(0); 6807 } 6808 // Process condition of the if clause. 6809 if (CondVal) { 6810 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6811 CGF.Builder.getInt32(1)); 6812 } 6813 return NumThreads; 6814 } 6815 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6816 return CGF.Builder.getInt32(1); 6817 return DefaultThreadLimitVal; 6818 } 6819 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6820 : CGF.Builder.getInt32(0); 6821 } 6822 6823 /// Emit the number of threads for a target directive. Inspect the 6824 /// thread_limit clause associated with a teams construct combined or closely 6825 /// nested with the target directive. 6826 /// 6827 /// Emit the num_threads clause for directives such as 'target parallel' that 6828 /// have no associated teams construct. 6829 /// 6830 /// Otherwise, return nullptr. 6831 static llvm::Value * 6832 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6833 const OMPExecutableDirective &D) { 6834 assert(!CGF.getLangOpts().OpenMPIsDevice && 6835 "Clauses associated with the teams directive expected to be emitted " 6836 "only for the host!"); 6837 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6838 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6839 "Expected target-based executable directive."); 6840 CGBuilderTy &Bld = CGF.Builder; 6841 llvm::Value *ThreadLimitVal = nullptr; 6842 llvm::Value *NumThreadsVal = nullptr; 6843 switch (DirectiveKind) { 6844 case OMPD_target: { 6845 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6846 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6847 return NumThreads; 6848 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6849 CGF.getContext(), CS->getCapturedStmt()); 6850 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6851 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6852 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6853 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6854 const auto *ThreadLimitClause = 6855 Dir->getSingleClause<OMPThreadLimitClause>(); 6856 CodeGenFunction::LexicalScope Scope( 6857 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6858 if (const auto *PreInit = 6859 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6860 for (const auto *I : PreInit->decls()) { 6861 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6862 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6863 } else { 6864 CodeGenFunction::AutoVarEmission Emission = 6865 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6866 CGF.EmitAutoVarCleanups(Emission); 6867 } 6868 } 6869 } 6870 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6871 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6872 ThreadLimitVal = 6873 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6874 } 6875 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6876 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6877 CS = Dir->getInnermostCapturedStmt(); 6878 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6879 CGF.getContext(), CS->getCapturedStmt()); 6880 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6881 } 6882 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6883 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6884 CS = Dir->getInnermostCapturedStmt(); 6885 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6886 return NumThreads; 6887 } 6888 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6889 return Bld.getInt32(1); 6890 } 6891 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6892 } 6893 case OMPD_target_teams: { 6894 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6895 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6896 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6897 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6898 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6899 ThreadLimitVal = 6900 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6901 } 6902 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6903 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6904 return NumThreads; 6905 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6906 CGF.getContext(), CS->getCapturedStmt()); 6907 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6908 if (Dir->getDirectiveKind() == OMPD_distribute) { 6909 CS = Dir->getInnermostCapturedStmt(); 6910 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6911 return NumThreads; 6912 } 6913 } 6914 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6915 } 6916 case OMPD_target_teams_distribute: 6917 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6918 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6919 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6920 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6921 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6922 ThreadLimitVal = 6923 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6924 } 6925 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6926 case OMPD_target_parallel: 6927 case OMPD_target_parallel_for: 6928 case OMPD_target_parallel_for_simd: 6929 case OMPD_target_teams_distribute_parallel_for: 6930 case OMPD_target_teams_distribute_parallel_for_simd: { 6931 llvm::Value *CondVal = nullptr; 6932 // Handle if clause. If if clause present, the number of threads is 6933 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6934 if (D.hasClausesOfKind<OMPIfClause>()) { 6935 const OMPIfClause *IfClause = nullptr; 6936 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6937 if (C->getNameModifier() == OMPD_unknown || 6938 C->getNameModifier() == OMPD_parallel) { 6939 IfClause = C; 6940 break; 6941 } 6942 } 6943 if (IfClause) { 6944 const Expr *Cond = IfClause->getCondition(); 6945 bool Result; 6946 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6947 if (!Result) 6948 return Bld.getInt32(1); 6949 } else { 6950 CodeGenFunction::RunCleanupsScope Scope(CGF); 6951 CondVal = CGF.EvaluateExprAsBool(Cond); 6952 } 6953 } 6954 } 6955 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6956 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6957 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6958 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6959 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6960 ThreadLimitVal = 6961 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6962 } 6963 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6964 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6965 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6966 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6967 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6968 NumThreadsVal = 6969 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6970 ThreadLimitVal = ThreadLimitVal 6971 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6972 ThreadLimitVal), 6973 NumThreadsVal, ThreadLimitVal) 6974 : NumThreadsVal; 6975 } 6976 if (!ThreadLimitVal) 6977 ThreadLimitVal = Bld.getInt32(0); 6978 if (CondVal) 6979 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6980 return ThreadLimitVal; 6981 } 6982 case OMPD_target_teams_distribute_simd: 6983 case OMPD_target_simd: 6984 return Bld.getInt32(1); 6985 case OMPD_parallel: 6986 case OMPD_for: 6987 case OMPD_parallel_for: 6988 case OMPD_parallel_master: 6989 case OMPD_parallel_sections: 6990 case OMPD_for_simd: 6991 case OMPD_parallel_for_simd: 6992 case OMPD_cancel: 6993 case OMPD_cancellation_point: 6994 case OMPD_ordered: 6995 case OMPD_threadprivate: 6996 case OMPD_allocate: 6997 case OMPD_task: 6998 case OMPD_simd: 6999 case OMPD_tile: 7000 case OMPD_unroll: 7001 case OMPD_sections: 7002 case OMPD_section: 7003 case OMPD_single: 7004 case OMPD_master: 7005 case OMPD_critical: 7006 case OMPD_taskyield: 7007 case OMPD_barrier: 7008 case OMPD_taskwait: 7009 case OMPD_taskgroup: 7010 case OMPD_atomic: 7011 case OMPD_flush: 7012 case OMPD_depobj: 7013 case OMPD_scan: 7014 case OMPD_teams: 7015 case OMPD_target_data: 7016 case OMPD_target_exit_data: 7017 case OMPD_target_enter_data: 7018 case OMPD_distribute: 7019 case OMPD_distribute_simd: 7020 case OMPD_distribute_parallel_for: 7021 case OMPD_distribute_parallel_for_simd: 7022 case OMPD_teams_distribute: 7023 case OMPD_teams_distribute_simd: 7024 case OMPD_teams_distribute_parallel_for: 7025 case OMPD_teams_distribute_parallel_for_simd: 7026 case OMPD_target_update: 7027 case OMPD_declare_simd: 7028 case OMPD_declare_variant: 7029 case OMPD_begin_declare_variant: 7030 case OMPD_end_declare_variant: 7031 case OMPD_declare_target: 7032 case OMPD_end_declare_target: 7033 case OMPD_declare_reduction: 7034 case OMPD_declare_mapper: 7035 case OMPD_taskloop: 7036 case OMPD_taskloop_simd: 7037 case OMPD_master_taskloop: 7038 case OMPD_master_taskloop_simd: 7039 case OMPD_parallel_master_taskloop: 7040 case OMPD_parallel_master_taskloop_simd: 7041 case OMPD_requires: 7042 case OMPD_unknown: 7043 break; 7044 default: 7045 break; 7046 } 7047 llvm_unreachable("Unsupported directive kind."); 7048 } 7049 7050 namespace { 7051 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7052 7053 // Utility to handle information from clauses associated with a given 7054 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7055 // It provides a convenient interface to obtain the information and generate 7056 // code for that information. 7057 class MappableExprsHandler { 7058 public: 7059 /// Values for bit flags used to specify the mapping type for 7060 /// offloading. 7061 enum OpenMPOffloadMappingFlags : uint64_t { 7062 /// No flags 7063 OMP_MAP_NONE = 0x0, 7064 /// Allocate memory on the device and move data from host to device. 7065 OMP_MAP_TO = 0x01, 7066 /// Allocate memory on the device and move data from device to host. 7067 OMP_MAP_FROM = 0x02, 7068 /// Always perform the requested mapping action on the element, even 7069 /// if it was already mapped before. 7070 OMP_MAP_ALWAYS = 0x04, 7071 /// Delete the element from the device environment, ignoring the 7072 /// current reference count associated with the element. 7073 OMP_MAP_DELETE = 0x08, 7074 /// The element being mapped is a pointer-pointee pair; both the 7075 /// pointer and the pointee should be mapped. 7076 OMP_MAP_PTR_AND_OBJ = 0x10, 7077 /// This flags signals that the base address of an entry should be 7078 /// passed to the target kernel as an argument. 7079 OMP_MAP_TARGET_PARAM = 0x20, 7080 /// Signal that the runtime library has to return the device pointer 7081 /// in the current position for the data being mapped. Used when we have the 7082 /// use_device_ptr or use_device_addr clause. 7083 OMP_MAP_RETURN_PARAM = 0x40, 7084 /// This flag signals that the reference being passed is a pointer to 7085 /// private data. 7086 OMP_MAP_PRIVATE = 0x80, 7087 /// Pass the element to the device by value. 7088 OMP_MAP_LITERAL = 0x100, 7089 /// Implicit map 7090 OMP_MAP_IMPLICIT = 0x200, 7091 /// Close is a hint to the runtime to allocate memory close to 7092 /// the target device. 7093 OMP_MAP_CLOSE = 0x400, 7094 /// 0x800 is reserved for compatibility with XLC. 7095 /// Produce a runtime error if the data is not already allocated. 7096 OMP_MAP_PRESENT = 0x1000, 7097 /// Signal that the runtime library should use args as an array of 7098 /// descriptor_dim pointers and use args_size as dims. Used when we have 7099 /// non-contiguous list items in target update directive 7100 OMP_MAP_NON_CONTIG = 0x100000000000, 7101 /// The 16 MSBs of the flags indicate whether the entry is member of some 7102 /// struct/class. 7103 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7104 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7105 }; 7106 7107 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7108 static unsigned getFlagMemberOffset() { 7109 unsigned Offset = 0; 7110 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7111 Remain = Remain >> 1) 7112 Offset++; 7113 return Offset; 7114 } 7115 7116 /// Class that holds debugging information for a data mapping to be passed to 7117 /// the runtime library. 7118 class MappingExprInfo { 7119 /// The variable declaration used for the data mapping. 7120 const ValueDecl *MapDecl = nullptr; 7121 /// The original expression used in the map clause, or null if there is 7122 /// none. 7123 const Expr *MapExpr = nullptr; 7124 7125 public: 7126 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7127 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7128 7129 const ValueDecl *getMapDecl() const { return MapDecl; } 7130 const Expr *getMapExpr() const { return MapExpr; } 7131 }; 7132 7133 /// Class that associates information with a base pointer to be passed to the 7134 /// runtime library. 7135 class BasePointerInfo { 7136 /// The base pointer. 7137 llvm::Value *Ptr = nullptr; 7138 /// The base declaration that refers to this device pointer, or null if 7139 /// there is none. 7140 const ValueDecl *DevPtrDecl = nullptr; 7141 7142 public: 7143 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7144 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7145 llvm::Value *operator*() const { return Ptr; } 7146 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7147 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7148 }; 7149 7150 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7151 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7152 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7153 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7154 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7155 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7156 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7157 7158 /// This structure contains combined information generated for mappable 7159 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7160 /// mappers, and non-contiguous information. 7161 struct MapCombinedInfoTy { 7162 struct StructNonContiguousInfo { 7163 bool IsNonContiguous = false; 7164 MapDimArrayTy Dims; 7165 MapNonContiguousArrayTy Offsets; 7166 MapNonContiguousArrayTy Counts; 7167 MapNonContiguousArrayTy Strides; 7168 }; 7169 MapExprsArrayTy Exprs; 7170 MapBaseValuesArrayTy BasePointers; 7171 MapValuesArrayTy Pointers; 7172 MapValuesArrayTy Sizes; 7173 MapFlagsArrayTy Types; 7174 MapMappersArrayTy Mappers; 7175 StructNonContiguousInfo NonContigInfo; 7176 7177 /// Append arrays in \a CurInfo. 7178 void append(MapCombinedInfoTy &CurInfo) { 7179 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7180 BasePointers.append(CurInfo.BasePointers.begin(), 7181 CurInfo.BasePointers.end()); 7182 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7183 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7184 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7185 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7186 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7187 CurInfo.NonContigInfo.Dims.end()); 7188 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7189 CurInfo.NonContigInfo.Offsets.end()); 7190 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7191 CurInfo.NonContigInfo.Counts.end()); 7192 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7193 CurInfo.NonContigInfo.Strides.end()); 7194 } 7195 }; 7196 7197 /// Map between a struct and the its lowest & highest elements which have been 7198 /// mapped. 7199 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7200 /// HE(FieldIndex, Pointer)} 7201 struct StructRangeInfoTy { 7202 MapCombinedInfoTy PreliminaryMapData; 7203 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7204 0, Address::invalid()}; 7205 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7206 0, Address::invalid()}; 7207 Address Base = Address::invalid(); 7208 Address LB = Address::invalid(); 7209 bool IsArraySection = false; 7210 bool HasCompleteRecord = false; 7211 }; 7212 7213 private: 7214 /// Kind that defines how a device pointer has to be returned. 7215 struct MapInfo { 7216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7217 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7218 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7219 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7220 bool ReturnDevicePointer = false; 7221 bool IsImplicit = false; 7222 const ValueDecl *Mapper = nullptr; 7223 const Expr *VarRef = nullptr; 7224 bool ForDeviceAddr = false; 7225 7226 MapInfo() = default; 7227 MapInfo( 7228 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7229 OpenMPMapClauseKind MapType, 7230 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7231 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7232 bool ReturnDevicePointer, bool IsImplicit, 7233 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7234 bool ForDeviceAddr = false) 7235 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7236 MotionModifiers(MotionModifiers), 7237 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7238 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7239 }; 7240 7241 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7242 /// member and there is no map information about it, then emission of that 7243 /// entry is deferred until the whole struct has been processed. 7244 struct DeferredDevicePtrEntryTy { 7245 const Expr *IE = nullptr; 7246 const ValueDecl *VD = nullptr; 7247 bool ForDeviceAddr = false; 7248 7249 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7250 bool ForDeviceAddr) 7251 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7252 }; 7253 7254 /// The target directive from where the mappable clauses were extracted. It 7255 /// is either a executable directive or a user-defined mapper directive. 7256 llvm::PointerUnion<const OMPExecutableDirective *, 7257 const OMPDeclareMapperDecl *> 7258 CurDir; 7259 7260 /// Function the directive is being generated for. 7261 CodeGenFunction &CGF; 7262 7263 /// Set of all first private variables in the current directive. 7264 /// bool data is set to true if the variable is implicitly marked as 7265 /// firstprivate, false otherwise. 7266 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7267 7268 /// Map between device pointer declarations and their expression components. 7269 /// The key value for declarations in 'this' is null. 7270 llvm::DenseMap< 7271 const ValueDecl *, 7272 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7273 DevPointersMap; 7274 7275 llvm::Value *getExprTypeSize(const Expr *E) const { 7276 QualType ExprTy = E->getType().getCanonicalType(); 7277 7278 // Calculate the size for array shaping expression. 7279 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7280 llvm::Value *Size = 7281 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7282 for (const Expr *SE : OAE->getDimensions()) { 7283 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7284 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7285 CGF.getContext().getSizeType(), 7286 SE->getExprLoc()); 7287 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7288 } 7289 return Size; 7290 } 7291 7292 // Reference types are ignored for mapping purposes. 7293 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7294 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7295 7296 // Given that an array section is considered a built-in type, we need to 7297 // do the calculation based on the length of the section instead of relying 7298 // on CGF.getTypeSize(E->getType()). 7299 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7300 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7301 OAE->getBase()->IgnoreParenImpCasts()) 7302 .getCanonicalType(); 7303 7304 // If there is no length associated with the expression and lower bound is 7305 // not specified too, that means we are using the whole length of the 7306 // base. 7307 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7308 !OAE->getLowerBound()) 7309 return CGF.getTypeSize(BaseTy); 7310 7311 llvm::Value *ElemSize; 7312 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7313 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7314 } else { 7315 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7316 assert(ATy && "Expecting array type if not a pointer type."); 7317 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7318 } 7319 7320 // If we don't have a length at this point, that is because we have an 7321 // array section with a single element. 7322 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7323 return ElemSize; 7324 7325 if (const Expr *LenExpr = OAE->getLength()) { 7326 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7327 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7328 CGF.getContext().getSizeType(), 7329 LenExpr->getExprLoc()); 7330 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7331 } 7332 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7333 OAE->getLowerBound() && "expected array_section[lb:]."); 7334 // Size = sizetype - lb * elemtype; 7335 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7336 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7337 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7338 CGF.getContext().getSizeType(), 7339 OAE->getLowerBound()->getExprLoc()); 7340 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7341 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7342 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7343 LengthVal = CGF.Builder.CreateSelect( 7344 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7345 return LengthVal; 7346 } 7347 return CGF.getTypeSize(ExprTy); 7348 } 7349 7350 /// Return the corresponding bits for a given map clause modifier. Add 7351 /// a flag marking the map as a pointer if requested. Add a flag marking the 7352 /// map as the first one of a series of maps that relate to the same map 7353 /// expression. 7354 OpenMPOffloadMappingFlags getMapTypeBits( 7355 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7356 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7357 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7358 OpenMPOffloadMappingFlags Bits = 7359 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7360 switch (MapType) { 7361 case OMPC_MAP_alloc: 7362 case OMPC_MAP_release: 7363 // alloc and release is the default behavior in the runtime library, i.e. 7364 // if we don't pass any bits alloc/release that is what the runtime is 7365 // going to do. Therefore, we don't need to signal anything for these two 7366 // type modifiers. 7367 break; 7368 case OMPC_MAP_to: 7369 Bits |= OMP_MAP_TO; 7370 break; 7371 case OMPC_MAP_from: 7372 Bits |= OMP_MAP_FROM; 7373 break; 7374 case OMPC_MAP_tofrom: 7375 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7376 break; 7377 case OMPC_MAP_delete: 7378 Bits |= OMP_MAP_DELETE; 7379 break; 7380 case OMPC_MAP_unknown: 7381 llvm_unreachable("Unexpected map type!"); 7382 } 7383 if (AddPtrFlag) 7384 Bits |= OMP_MAP_PTR_AND_OBJ; 7385 if (AddIsTargetParamFlag) 7386 Bits |= OMP_MAP_TARGET_PARAM; 7387 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7388 != MapModifiers.end()) 7389 Bits |= OMP_MAP_ALWAYS; 7390 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7391 != MapModifiers.end()) 7392 Bits |= OMP_MAP_CLOSE; 7393 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7394 MapModifiers.end() || 7395 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7396 MotionModifiers.end()) 7397 Bits |= OMP_MAP_PRESENT; 7398 if (IsNonContiguous) 7399 Bits |= OMP_MAP_NON_CONTIG; 7400 return Bits; 7401 } 7402 7403 /// Return true if the provided expression is a final array section. A 7404 /// final array section, is one whose length can't be proved to be one. 7405 bool isFinalArraySectionExpression(const Expr *E) const { 7406 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7407 7408 // It is not an array section and therefore not a unity-size one. 7409 if (!OASE) 7410 return false; 7411 7412 // An array section with no colon always refer to a single element. 7413 if (OASE->getColonLocFirst().isInvalid()) 7414 return false; 7415 7416 const Expr *Length = OASE->getLength(); 7417 7418 // If we don't have a length we have to check if the array has size 1 7419 // for this dimension. Also, we should always expect a length if the 7420 // base type is pointer. 7421 if (!Length) { 7422 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7423 OASE->getBase()->IgnoreParenImpCasts()) 7424 .getCanonicalType(); 7425 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7426 return ATy->getSize().getSExtValue() != 1; 7427 // If we don't have a constant dimension length, we have to consider 7428 // the current section as having any size, so it is not necessarily 7429 // unitary. If it happen to be unity size, that's user fault. 7430 return true; 7431 } 7432 7433 // Check if the length evaluates to 1. 7434 Expr::EvalResult Result; 7435 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7436 return true; // Can have more that size 1. 7437 7438 llvm::APSInt ConstLength = Result.Val.getInt(); 7439 return ConstLength.getSExtValue() != 1; 7440 } 7441 7442 /// Generate the base pointers, section pointers, sizes, map type bits, and 7443 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7444 /// map type, map or motion modifiers, and expression components. 7445 /// \a IsFirstComponent should be set to true if the provided set of 7446 /// components is the first associated with a capture. 7447 void generateInfoForComponentList( 7448 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7449 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7450 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7451 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7452 bool IsFirstComponentList, bool IsImplicit, 7453 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7454 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7455 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7456 OverlappedElements = llvm::None) const { 7457 // The following summarizes what has to be generated for each map and the 7458 // types below. The generated information is expressed in this order: 7459 // base pointer, section pointer, size, flags 7460 // (to add to the ones that come from the map type and modifier). 7461 // 7462 // double d; 7463 // int i[100]; 7464 // float *p; 7465 // 7466 // struct S1 { 7467 // int i; 7468 // float f[50]; 7469 // } 7470 // struct S2 { 7471 // int i; 7472 // float f[50]; 7473 // S1 s; 7474 // double *p; 7475 // struct S2 *ps; 7476 // int &ref; 7477 // } 7478 // S2 s; 7479 // S2 *ps; 7480 // 7481 // map(d) 7482 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7483 // 7484 // map(i) 7485 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7486 // 7487 // map(i[1:23]) 7488 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7489 // 7490 // map(p) 7491 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7492 // 7493 // map(p[1:24]) 7494 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7495 // in unified shared memory mode or for local pointers 7496 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7497 // 7498 // map(s) 7499 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7500 // 7501 // map(s.i) 7502 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7503 // 7504 // map(s.s.f) 7505 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7506 // 7507 // map(s.p) 7508 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7509 // 7510 // map(to: s.p[:22]) 7511 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7512 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7513 // &(s.p), &(s.p[0]), 22*sizeof(double), 7514 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7515 // (*) alloc space for struct members, only this is a target parameter 7516 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7517 // optimizes this entry out, same in the examples below) 7518 // (***) map the pointee (map: to) 7519 // 7520 // map(to: s.ref) 7521 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7522 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7523 // (*) alloc space for struct members, only this is a target parameter 7524 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7525 // optimizes this entry out, same in the examples below) 7526 // (***) map the pointee (map: to) 7527 // 7528 // map(s.ps) 7529 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7530 // 7531 // map(from: s.ps->s.i) 7532 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7533 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7534 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7535 // 7536 // map(to: s.ps->ps) 7537 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7538 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7539 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7540 // 7541 // map(s.ps->ps->ps) 7542 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7543 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7544 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7545 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7546 // 7547 // map(to: s.ps->ps->s.f[:22]) 7548 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7549 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7550 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7551 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7552 // 7553 // map(ps) 7554 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7555 // 7556 // map(ps->i) 7557 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7558 // 7559 // map(ps->s.f) 7560 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7561 // 7562 // map(from: ps->p) 7563 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7564 // 7565 // map(to: ps->p[:22]) 7566 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7567 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7568 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7569 // 7570 // map(ps->ps) 7571 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7572 // 7573 // map(from: ps->ps->s.i) 7574 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7575 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7576 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7577 // 7578 // map(from: ps->ps->ps) 7579 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7580 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7581 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7582 // 7583 // map(ps->ps->ps->ps) 7584 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7585 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7586 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7587 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7588 // 7589 // map(to: ps->ps->ps->s.f[:22]) 7590 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7591 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7592 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7593 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7594 // 7595 // map(to: s.f[:22]) map(from: s.p[:33]) 7596 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7597 // sizeof(double*) (**), TARGET_PARAM 7598 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7599 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7600 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7601 // (*) allocate contiguous space needed to fit all mapped members even if 7602 // we allocate space for members not mapped (in this example, 7603 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7604 // them as well because they fall between &s.f[0] and &s.p) 7605 // 7606 // map(from: s.f[:22]) map(to: ps->p[:33]) 7607 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7608 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7609 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7610 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7611 // (*) the struct this entry pertains to is the 2nd element in the list of 7612 // arguments, hence MEMBER_OF(2) 7613 // 7614 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7615 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7616 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7617 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7618 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7619 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7620 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7621 // (*) the struct this entry pertains to is the 4th element in the list 7622 // of arguments, hence MEMBER_OF(4) 7623 7624 // Track if the map information being generated is the first for a capture. 7625 bool IsCaptureFirstInfo = IsFirstComponentList; 7626 // When the variable is on a declare target link or in a to clause with 7627 // unified memory, a reference is needed to hold the host/device address 7628 // of the variable. 7629 bool RequiresReference = false; 7630 7631 // Scan the components from the base to the complete expression. 7632 auto CI = Components.rbegin(); 7633 auto CE = Components.rend(); 7634 auto I = CI; 7635 7636 // Track if the map information being generated is the first for a list of 7637 // components. 7638 bool IsExpressionFirstInfo = true; 7639 bool FirstPointerInComplexData = false; 7640 Address BP = Address::invalid(); 7641 const Expr *AssocExpr = I->getAssociatedExpression(); 7642 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7643 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7644 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7645 7646 if (isa<MemberExpr>(AssocExpr)) { 7647 // The base is the 'this' pointer. The content of the pointer is going 7648 // to be the base of the field being mapped. 7649 BP = CGF.LoadCXXThisAddress(); 7650 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7651 (OASE && 7652 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7653 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7654 } else if (OAShE && 7655 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7656 BP = Address( 7657 CGF.EmitScalarExpr(OAShE->getBase()), 7658 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7659 } else { 7660 // The base is the reference to the variable. 7661 // BP = &Var. 7662 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7663 if (const auto *VD = 7664 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7665 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7666 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7667 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7668 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7669 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7670 RequiresReference = true; 7671 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7672 } 7673 } 7674 } 7675 7676 // If the variable is a pointer and is being dereferenced (i.e. is not 7677 // the last component), the base has to be the pointer itself, not its 7678 // reference. References are ignored for mapping purposes. 7679 QualType Ty = 7680 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7681 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7682 // No need to generate individual map information for the pointer, it 7683 // can be associated with the combined storage if shared memory mode is 7684 // active or the base declaration is not global variable. 7685 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7686 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7687 !VD || VD->hasLocalStorage()) 7688 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7689 else 7690 FirstPointerInComplexData = true; 7691 ++I; 7692 } 7693 } 7694 7695 // Track whether a component of the list should be marked as MEMBER_OF some 7696 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7697 // in a component list should be marked as MEMBER_OF, all subsequent entries 7698 // do not belong to the base struct. E.g. 7699 // struct S2 s; 7700 // s.ps->ps->ps->f[:] 7701 // (1) (2) (3) (4) 7702 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7703 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7704 // is the pointee of ps(2) which is not member of struct s, so it should not 7705 // be marked as such (it is still PTR_AND_OBJ). 7706 // The variable is initialized to false so that PTR_AND_OBJ entries which 7707 // are not struct members are not considered (e.g. array of pointers to 7708 // data). 7709 bool ShouldBeMemberOf = false; 7710 7711 // Variable keeping track of whether or not we have encountered a component 7712 // in the component list which is a member expression. Useful when we have a 7713 // pointer or a final array section, in which case it is the previous 7714 // component in the list which tells us whether we have a member expression. 7715 // E.g. X.f[:] 7716 // While processing the final array section "[:]" it is "f" which tells us 7717 // whether we are dealing with a member of a declared struct. 7718 const MemberExpr *EncounteredME = nullptr; 7719 7720 // Track for the total number of dimension. Start from one for the dummy 7721 // dimension. 7722 uint64_t DimSize = 1; 7723 7724 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7725 bool IsPrevMemberReference = false; 7726 7727 for (; I != CE; ++I) { 7728 // If the current component is member of a struct (parent struct) mark it. 7729 if (!EncounteredME) { 7730 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7731 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7732 // as MEMBER_OF the parent struct. 7733 if (EncounteredME) { 7734 ShouldBeMemberOf = true; 7735 // Do not emit as complex pointer if this is actually not array-like 7736 // expression. 7737 if (FirstPointerInComplexData) { 7738 QualType Ty = std::prev(I) 7739 ->getAssociatedDeclaration() 7740 ->getType() 7741 .getNonReferenceType(); 7742 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7743 FirstPointerInComplexData = false; 7744 } 7745 } 7746 } 7747 7748 auto Next = std::next(I); 7749 7750 // We need to generate the addresses and sizes if this is the last 7751 // component, if the component is a pointer or if it is an array section 7752 // whose length can't be proved to be one. If this is a pointer, it 7753 // becomes the base address for the following components. 7754 7755 // A final array section, is one whose length can't be proved to be one. 7756 // If the map item is non-contiguous then we don't treat any array section 7757 // as final array section. 7758 bool IsFinalArraySection = 7759 !IsNonContiguous && 7760 isFinalArraySectionExpression(I->getAssociatedExpression()); 7761 7762 // If we have a declaration for the mapping use that, otherwise use 7763 // the base declaration of the map clause. 7764 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7765 ? I->getAssociatedDeclaration() 7766 : BaseDecl; 7767 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7768 : MapExpr; 7769 7770 // Get information on whether the element is a pointer. Have to do a 7771 // special treatment for array sections given that they are built-in 7772 // types. 7773 const auto *OASE = 7774 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7775 const auto *OAShE = 7776 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7777 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7778 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7779 bool IsPointer = 7780 OAShE || 7781 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7782 .getCanonicalType() 7783 ->isAnyPointerType()) || 7784 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7785 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7786 MapDecl && 7787 MapDecl->getType()->isLValueReferenceType(); 7788 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7789 7790 if (OASE) 7791 ++DimSize; 7792 7793 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7794 IsFinalArraySection) { 7795 // If this is not the last component, we expect the pointer to be 7796 // associated with an array expression or member expression. 7797 assert((Next == CE || 7798 isa<MemberExpr>(Next->getAssociatedExpression()) || 7799 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7800 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7801 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7802 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7803 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7804 "Unexpected expression"); 7805 7806 Address LB = Address::invalid(); 7807 Address LowestElem = Address::invalid(); 7808 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7809 const MemberExpr *E) { 7810 const Expr *BaseExpr = E->getBase(); 7811 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7812 // scalar. 7813 LValue BaseLV; 7814 if (E->isArrow()) { 7815 LValueBaseInfo BaseInfo; 7816 TBAAAccessInfo TBAAInfo; 7817 Address Addr = 7818 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7819 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7820 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7821 } else { 7822 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7823 } 7824 return BaseLV; 7825 }; 7826 if (OAShE) { 7827 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7828 CGF.getContext().getTypeAlignInChars( 7829 OAShE->getBase()->getType())); 7830 } else if (IsMemberReference) { 7831 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7832 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7833 LowestElem = CGF.EmitLValueForFieldInitialization( 7834 BaseLVal, cast<FieldDecl>(MapDecl)) 7835 .getAddress(CGF); 7836 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7837 .getAddress(CGF); 7838 } else { 7839 LowestElem = LB = 7840 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7841 .getAddress(CGF); 7842 } 7843 7844 // If this component is a pointer inside the base struct then we don't 7845 // need to create any entry for it - it will be combined with the object 7846 // it is pointing to into a single PTR_AND_OBJ entry. 7847 bool IsMemberPointerOrAddr = 7848 EncounteredME && 7849 (((IsPointer || ForDeviceAddr) && 7850 I->getAssociatedExpression() == EncounteredME) || 7851 (IsPrevMemberReference && !IsPointer) || 7852 (IsMemberReference && Next != CE && 7853 !Next->getAssociatedExpression()->getType()->isPointerType())); 7854 if (!OverlappedElements.empty() && Next == CE) { 7855 // Handle base element with the info for overlapped elements. 7856 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7857 assert(!IsPointer && 7858 "Unexpected base element with the pointer type."); 7859 // Mark the whole struct as the struct that requires allocation on the 7860 // device. 7861 PartialStruct.LowestElem = {0, LowestElem}; 7862 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7863 I->getAssociatedExpression()->getType()); 7864 Address HB = CGF.Builder.CreateConstGEP( 7865 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7866 CGF.VoidPtrTy), 7867 TypeSize.getQuantity() - 1); 7868 PartialStruct.HighestElem = { 7869 std::numeric_limits<decltype( 7870 PartialStruct.HighestElem.first)>::max(), 7871 HB}; 7872 PartialStruct.Base = BP; 7873 PartialStruct.LB = LB; 7874 assert( 7875 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7876 "Overlapped elements must be used only once for the variable."); 7877 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7878 // Emit data for non-overlapped data. 7879 OpenMPOffloadMappingFlags Flags = 7880 OMP_MAP_MEMBER_OF | 7881 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7882 /*AddPtrFlag=*/false, 7883 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7884 llvm::Value *Size = nullptr; 7885 // Do bitcopy of all non-overlapped structure elements. 7886 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7887 Component : OverlappedElements) { 7888 Address ComponentLB = Address::invalid(); 7889 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7890 Component) { 7891 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7892 const auto *FD = dyn_cast<FieldDecl>(VD); 7893 if (FD && FD->getType()->isLValueReferenceType()) { 7894 const auto *ME = 7895 cast<MemberExpr>(MC.getAssociatedExpression()); 7896 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7897 ComponentLB = 7898 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7899 .getAddress(CGF); 7900 } else { 7901 ComponentLB = 7902 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7903 .getAddress(CGF); 7904 } 7905 Size = CGF.Builder.CreatePtrDiff( 7906 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7907 CGF.EmitCastToVoidPtr(LB.getPointer())); 7908 break; 7909 } 7910 } 7911 assert(Size && "Failed to determine structure size"); 7912 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7913 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7914 CombinedInfo.Pointers.push_back(LB.getPointer()); 7915 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7916 Size, CGF.Int64Ty, /*isSigned=*/true)); 7917 CombinedInfo.Types.push_back(Flags); 7918 CombinedInfo.Mappers.push_back(nullptr); 7919 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7920 : 1); 7921 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7922 } 7923 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7924 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7925 CombinedInfo.Pointers.push_back(LB.getPointer()); 7926 Size = CGF.Builder.CreatePtrDiff( 7927 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7928 CGF.EmitCastToVoidPtr(LB.getPointer())); 7929 CombinedInfo.Sizes.push_back( 7930 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7931 CombinedInfo.Types.push_back(Flags); 7932 CombinedInfo.Mappers.push_back(nullptr); 7933 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7934 : 1); 7935 break; 7936 } 7937 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7938 if (!IsMemberPointerOrAddr || 7939 (Next == CE && MapType != OMPC_MAP_unknown)) { 7940 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7941 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7942 CombinedInfo.Pointers.push_back(LB.getPointer()); 7943 CombinedInfo.Sizes.push_back( 7944 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7945 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7946 : 1); 7947 7948 // If Mapper is valid, the last component inherits the mapper. 7949 bool HasMapper = Mapper && Next == CE; 7950 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7951 7952 // We need to add a pointer flag for each map that comes from the 7953 // same expression except for the first one. We also need to signal 7954 // this map is the first one that relates with the current capture 7955 // (there is a set of entries for each capture). 7956 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7957 MapType, MapModifiers, MotionModifiers, IsImplicit, 7958 !IsExpressionFirstInfo || RequiresReference || 7959 FirstPointerInComplexData || IsMemberReference, 7960 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7961 7962 if (!IsExpressionFirstInfo || IsMemberReference) { 7963 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7964 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7965 if (IsPointer || (IsMemberReference && Next != CE)) 7966 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7967 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7968 7969 if (ShouldBeMemberOf) { 7970 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7971 // should be later updated with the correct value of MEMBER_OF. 7972 Flags |= OMP_MAP_MEMBER_OF; 7973 // From now on, all subsequent PTR_AND_OBJ entries should not be 7974 // marked as MEMBER_OF. 7975 ShouldBeMemberOf = false; 7976 } 7977 } 7978 7979 CombinedInfo.Types.push_back(Flags); 7980 } 7981 7982 // If we have encountered a member expression so far, keep track of the 7983 // mapped member. If the parent is "*this", then the value declaration 7984 // is nullptr. 7985 if (EncounteredME) { 7986 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7987 unsigned FieldIndex = FD->getFieldIndex(); 7988 7989 // Update info about the lowest and highest elements for this struct 7990 if (!PartialStruct.Base.isValid()) { 7991 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7992 if (IsFinalArraySection) { 7993 Address HB = 7994 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7995 .getAddress(CGF); 7996 PartialStruct.HighestElem = {FieldIndex, HB}; 7997 } else { 7998 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7999 } 8000 PartialStruct.Base = BP; 8001 PartialStruct.LB = BP; 8002 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8003 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8004 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8005 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8006 } 8007 } 8008 8009 // Need to emit combined struct for array sections. 8010 if (IsFinalArraySection || IsNonContiguous) 8011 PartialStruct.IsArraySection = true; 8012 8013 // If we have a final array section, we are done with this expression. 8014 if (IsFinalArraySection) 8015 break; 8016 8017 // The pointer becomes the base for the next element. 8018 if (Next != CE) 8019 BP = IsMemberReference ? LowestElem : LB; 8020 8021 IsExpressionFirstInfo = false; 8022 IsCaptureFirstInfo = false; 8023 FirstPointerInComplexData = false; 8024 IsPrevMemberReference = IsMemberReference; 8025 } else if (FirstPointerInComplexData) { 8026 QualType Ty = Components.rbegin() 8027 ->getAssociatedDeclaration() 8028 ->getType() 8029 .getNonReferenceType(); 8030 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8031 FirstPointerInComplexData = false; 8032 } 8033 } 8034 // If ran into the whole component - allocate the space for the whole 8035 // record. 8036 if (!EncounteredME) 8037 PartialStruct.HasCompleteRecord = true; 8038 8039 if (!IsNonContiguous) 8040 return; 8041 8042 const ASTContext &Context = CGF.getContext(); 8043 8044 // For supporting stride in array section, we need to initialize the first 8045 // dimension size as 1, first offset as 0, and first count as 1 8046 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8047 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8048 MapValuesArrayTy CurStrides; 8049 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8050 uint64_t ElementTypeSize; 8051 8052 // Collect Size information for each dimension and get the element size as 8053 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8054 // should be [10, 10] and the first stride is 4 btyes. 8055 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8056 Components) { 8057 const Expr *AssocExpr = Component.getAssociatedExpression(); 8058 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8059 8060 if (!OASE) 8061 continue; 8062 8063 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8064 auto *CAT = Context.getAsConstantArrayType(Ty); 8065 auto *VAT = Context.getAsVariableArrayType(Ty); 8066 8067 // We need all the dimension size except for the last dimension. 8068 assert((VAT || CAT || &Component == &*Components.begin()) && 8069 "Should be either ConstantArray or VariableArray if not the " 8070 "first Component"); 8071 8072 // Get element size if CurStrides is empty. 8073 if (CurStrides.empty()) { 8074 const Type *ElementType = nullptr; 8075 if (CAT) 8076 ElementType = CAT->getElementType().getTypePtr(); 8077 else if (VAT) 8078 ElementType = VAT->getElementType().getTypePtr(); 8079 else 8080 assert(&Component == &*Components.begin() && 8081 "Only expect pointer (non CAT or VAT) when this is the " 8082 "first Component"); 8083 // If ElementType is null, then it means the base is a pointer 8084 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8085 // for next iteration. 8086 if (ElementType) { 8087 // For the case that having pointer as base, we need to remove one 8088 // level of indirection. 8089 if (&Component != &*Components.begin()) 8090 ElementType = ElementType->getPointeeOrArrayElementType(); 8091 ElementTypeSize = 8092 Context.getTypeSizeInChars(ElementType).getQuantity(); 8093 CurStrides.push_back( 8094 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8095 } 8096 } 8097 // Get dimension value except for the last dimension since we don't need 8098 // it. 8099 if (DimSizes.size() < Components.size() - 1) { 8100 if (CAT) 8101 DimSizes.push_back(llvm::ConstantInt::get( 8102 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8103 else if (VAT) 8104 DimSizes.push_back(CGF.Builder.CreateIntCast( 8105 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8106 /*IsSigned=*/false)); 8107 } 8108 } 8109 8110 // Skip the dummy dimension since we have already have its information. 8111 auto DI = DimSizes.begin() + 1; 8112 // Product of dimension. 8113 llvm::Value *DimProd = 8114 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8115 8116 // Collect info for non-contiguous. Notice that offset, count, and stride 8117 // are only meaningful for array-section, so we insert a null for anything 8118 // other than array-section. 8119 // Also, the size of offset, count, and stride are not the same as 8120 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8121 // count, and stride are the same as the number of non-contiguous 8122 // declaration in target update to/from clause. 8123 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8124 Components) { 8125 const Expr *AssocExpr = Component.getAssociatedExpression(); 8126 8127 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8128 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8129 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8130 /*isSigned=*/false); 8131 CurOffsets.push_back(Offset); 8132 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8133 CurStrides.push_back(CurStrides.back()); 8134 continue; 8135 } 8136 8137 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8138 8139 if (!OASE) 8140 continue; 8141 8142 // Offset 8143 const Expr *OffsetExpr = OASE->getLowerBound(); 8144 llvm::Value *Offset = nullptr; 8145 if (!OffsetExpr) { 8146 // If offset is absent, then we just set it to zero. 8147 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8148 } else { 8149 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8150 CGF.Int64Ty, 8151 /*isSigned=*/false); 8152 } 8153 CurOffsets.push_back(Offset); 8154 8155 // Count 8156 const Expr *CountExpr = OASE->getLength(); 8157 llvm::Value *Count = nullptr; 8158 if (!CountExpr) { 8159 // In Clang, once a high dimension is an array section, we construct all 8160 // the lower dimension as array section, however, for case like 8161 // arr[0:2][2], Clang construct the inner dimension as an array section 8162 // but it actually is not in an array section form according to spec. 8163 if (!OASE->getColonLocFirst().isValid() && 8164 !OASE->getColonLocSecond().isValid()) { 8165 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8166 } else { 8167 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8168 // When the length is absent it defaults to ⌈(size − 8169 // lower-bound)/stride⌉, where size is the size of the array 8170 // dimension. 8171 const Expr *StrideExpr = OASE->getStride(); 8172 llvm::Value *Stride = 8173 StrideExpr 8174 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8175 CGF.Int64Ty, /*isSigned=*/false) 8176 : nullptr; 8177 if (Stride) 8178 Count = CGF.Builder.CreateUDiv( 8179 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8180 else 8181 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8182 } 8183 } else { 8184 Count = CGF.EmitScalarExpr(CountExpr); 8185 } 8186 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8187 CurCounts.push_back(Count); 8188 8189 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8190 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8191 // Offset Count Stride 8192 // D0 0 1 4 (int) <- dummy dimension 8193 // D1 0 2 8 (2 * (1) * 4) 8194 // D2 1 2 20 (1 * (1 * 5) * 4) 8195 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8196 const Expr *StrideExpr = OASE->getStride(); 8197 llvm::Value *Stride = 8198 StrideExpr 8199 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8200 CGF.Int64Ty, /*isSigned=*/false) 8201 : nullptr; 8202 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8203 if (Stride) 8204 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8205 else 8206 CurStrides.push_back(DimProd); 8207 if (DI != DimSizes.end()) 8208 ++DI; 8209 } 8210 8211 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8212 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8213 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8214 } 8215 8216 /// Return the adjusted map modifiers if the declaration a capture refers to 8217 /// appears in a first-private clause. This is expected to be used only with 8218 /// directives that start with 'target'. 8219 MappableExprsHandler::OpenMPOffloadMappingFlags 8220 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8221 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8222 8223 // A first private variable captured by reference will use only the 8224 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8225 // declaration is known as first-private in this handler. 8226 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8227 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8228 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8229 return MappableExprsHandler::OMP_MAP_ALWAYS | 8230 MappableExprsHandler::OMP_MAP_TO; 8231 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8232 return MappableExprsHandler::OMP_MAP_TO | 8233 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8234 return MappableExprsHandler::OMP_MAP_PRIVATE | 8235 MappableExprsHandler::OMP_MAP_TO; 8236 } 8237 return MappableExprsHandler::OMP_MAP_TO | 8238 MappableExprsHandler::OMP_MAP_FROM; 8239 } 8240 8241 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8242 // Rotate by getFlagMemberOffset() bits. 8243 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8244 << getFlagMemberOffset()); 8245 } 8246 8247 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8248 OpenMPOffloadMappingFlags MemberOfFlag) { 8249 // If the entry is PTR_AND_OBJ but has not been marked with the special 8250 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8251 // marked as MEMBER_OF. 8252 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8253 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8254 return; 8255 8256 // Reset the placeholder value to prepare the flag for the assignment of the 8257 // proper MEMBER_OF value. 8258 Flags &= ~OMP_MAP_MEMBER_OF; 8259 Flags |= MemberOfFlag; 8260 } 8261 8262 void getPlainLayout(const CXXRecordDecl *RD, 8263 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8264 bool AsBase) const { 8265 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8266 8267 llvm::StructType *St = 8268 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8269 8270 unsigned NumElements = St->getNumElements(); 8271 llvm::SmallVector< 8272 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8273 RecordLayout(NumElements); 8274 8275 // Fill bases. 8276 for (const auto &I : RD->bases()) { 8277 if (I.isVirtual()) 8278 continue; 8279 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8280 // Ignore empty bases. 8281 if (Base->isEmpty() || CGF.getContext() 8282 .getASTRecordLayout(Base) 8283 .getNonVirtualSize() 8284 .isZero()) 8285 continue; 8286 8287 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8288 RecordLayout[FieldIndex] = Base; 8289 } 8290 // Fill in virtual bases. 8291 for (const auto &I : RD->vbases()) { 8292 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8293 // Ignore empty bases. 8294 if (Base->isEmpty()) 8295 continue; 8296 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8297 if (RecordLayout[FieldIndex]) 8298 continue; 8299 RecordLayout[FieldIndex] = Base; 8300 } 8301 // Fill in all the fields. 8302 assert(!RD->isUnion() && "Unexpected union."); 8303 for (const auto *Field : RD->fields()) { 8304 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8305 // will fill in later.) 8306 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8307 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8308 RecordLayout[FieldIndex] = Field; 8309 } 8310 } 8311 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8312 &Data : RecordLayout) { 8313 if (Data.isNull()) 8314 continue; 8315 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8316 getPlainLayout(Base, Layout, /*AsBase=*/true); 8317 else 8318 Layout.push_back(Data.get<const FieldDecl *>()); 8319 } 8320 } 8321 8322 /// Generate all the base pointers, section pointers, sizes, map types, and 8323 /// mappers for the extracted mappable expressions (all included in \a 8324 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8325 /// pair of the relevant declaration and index where it occurs is appended to 8326 /// the device pointers info array. 8327 void generateAllInfoForClauses( 8328 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8329 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8330 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8331 // We have to process the component lists that relate with the same 8332 // declaration in a single chunk so that we can generate the map flags 8333 // correctly. Therefore, we organize all lists in a map. 8334 enum MapKind { Present, Allocs, Other, Total }; 8335 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8336 SmallVector<SmallVector<MapInfo, 8>, 4>> 8337 Info; 8338 8339 // Helper function to fill the information map for the different supported 8340 // clauses. 8341 auto &&InfoGen = 8342 [&Info, &SkipVarSet]( 8343 const ValueDecl *D, MapKind Kind, 8344 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8345 OpenMPMapClauseKind MapType, 8346 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8347 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8348 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8349 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8350 if (SkipVarSet.contains(D)) 8351 return; 8352 auto It = Info.find(D); 8353 if (It == Info.end()) 8354 It = Info 8355 .insert(std::make_pair( 8356 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8357 .first; 8358 It->second[Kind].emplace_back( 8359 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8360 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8361 }; 8362 8363 for (const auto *Cl : Clauses) { 8364 const auto *C = dyn_cast<OMPMapClause>(Cl); 8365 if (!C) 8366 continue; 8367 MapKind Kind = Other; 8368 if (!C->getMapTypeModifiers().empty() && 8369 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8370 return K == OMPC_MAP_MODIFIER_present; 8371 })) 8372 Kind = Present; 8373 else if (C->getMapType() == OMPC_MAP_alloc) 8374 Kind = Allocs; 8375 const auto *EI = C->getVarRefs().begin(); 8376 for (const auto L : C->component_lists()) { 8377 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8378 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8379 C->getMapTypeModifiers(), llvm::None, 8380 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8381 E); 8382 ++EI; 8383 } 8384 } 8385 for (const auto *Cl : Clauses) { 8386 const auto *C = dyn_cast<OMPToClause>(Cl); 8387 if (!C) 8388 continue; 8389 MapKind Kind = Other; 8390 if (!C->getMotionModifiers().empty() && 8391 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8392 return K == OMPC_MOTION_MODIFIER_present; 8393 })) 8394 Kind = Present; 8395 const auto *EI = C->getVarRefs().begin(); 8396 for (const auto L : C->component_lists()) { 8397 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8398 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8399 C->isImplicit(), std::get<2>(L), *EI); 8400 ++EI; 8401 } 8402 } 8403 for (const auto *Cl : Clauses) { 8404 const auto *C = dyn_cast<OMPFromClause>(Cl); 8405 if (!C) 8406 continue; 8407 MapKind Kind = Other; 8408 if (!C->getMotionModifiers().empty() && 8409 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8410 return K == OMPC_MOTION_MODIFIER_present; 8411 })) 8412 Kind = Present; 8413 const auto *EI = C->getVarRefs().begin(); 8414 for (const auto L : C->component_lists()) { 8415 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8416 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8417 C->isImplicit(), std::get<2>(L), *EI); 8418 ++EI; 8419 } 8420 } 8421 8422 // Look at the use_device_ptr clause information and mark the existing map 8423 // entries as such. If there is no map information for an entry in the 8424 // use_device_ptr list, we create one with map type 'alloc' and zero size 8425 // section. It is the user fault if that was not mapped before. If there is 8426 // no map information and the pointer is a struct member, then we defer the 8427 // emission of that entry until the whole struct has been processed. 8428 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8429 SmallVector<DeferredDevicePtrEntryTy, 4>> 8430 DeferredInfo; 8431 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8432 8433 for (const auto *Cl : Clauses) { 8434 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8435 if (!C) 8436 continue; 8437 for (const auto L : C->component_lists()) { 8438 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8439 std::get<1>(L); 8440 assert(!Components.empty() && 8441 "Not expecting empty list of components!"); 8442 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8443 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8444 const Expr *IE = Components.back().getAssociatedExpression(); 8445 // If the first component is a member expression, we have to look into 8446 // 'this', which maps to null in the map of map information. Otherwise 8447 // look directly for the information. 8448 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8449 8450 // We potentially have map information for this declaration already. 8451 // Look for the first set of components that refer to it. 8452 if (It != Info.end()) { 8453 bool Found = false; 8454 for (auto &Data : It->second) { 8455 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8456 return MI.Components.back().getAssociatedDeclaration() == VD; 8457 }); 8458 // If we found a map entry, signal that the pointer has to be 8459 // returned and move on to the next declaration. Exclude cases where 8460 // the base pointer is mapped as array subscript, array section or 8461 // array shaping. The base address is passed as a pointer to base in 8462 // this case and cannot be used as a base for use_device_ptr list 8463 // item. 8464 if (CI != Data.end()) { 8465 auto PrevCI = std::next(CI->Components.rbegin()); 8466 const auto *VarD = dyn_cast<VarDecl>(VD); 8467 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8468 isa<MemberExpr>(IE) || 8469 !VD->getType().getNonReferenceType()->isPointerType() || 8470 PrevCI == CI->Components.rend() || 8471 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8472 VarD->hasLocalStorage()) { 8473 CI->ReturnDevicePointer = true; 8474 Found = true; 8475 break; 8476 } 8477 } 8478 } 8479 if (Found) 8480 continue; 8481 } 8482 8483 // We didn't find any match in our map information - generate a zero 8484 // size array section - if the pointer is a struct member we defer this 8485 // action until the whole struct has been processed. 8486 if (isa<MemberExpr>(IE)) { 8487 // Insert the pointer into Info to be processed by 8488 // generateInfoForComponentList. Because it is a member pointer 8489 // without a pointee, no entry will be generated for it, therefore 8490 // we need to generate one after the whole struct has been processed. 8491 // Nonetheless, generateInfoForComponentList must be called to take 8492 // the pointer into account for the calculation of the range of the 8493 // partial struct. 8494 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8495 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8496 nullptr); 8497 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8498 } else { 8499 llvm::Value *Ptr = 8500 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8501 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8502 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8503 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8504 UseDevicePtrCombinedInfo.Sizes.push_back( 8505 llvm::Constant::getNullValue(CGF.Int64Ty)); 8506 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8507 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8508 } 8509 } 8510 } 8511 8512 // Look at the use_device_addr clause information and mark the existing map 8513 // entries as such. If there is no map information for an entry in the 8514 // use_device_addr list, we create one with map type 'alloc' and zero size 8515 // section. It is the user fault if that was not mapped before. If there is 8516 // no map information and the pointer is a struct member, then we defer the 8517 // emission of that entry until the whole struct has been processed. 8518 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8519 for (const auto *Cl : Clauses) { 8520 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8521 if (!C) 8522 continue; 8523 for (const auto L : C->component_lists()) { 8524 assert(!std::get<1>(L).empty() && 8525 "Not expecting empty list of components!"); 8526 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8527 if (!Processed.insert(VD).second) 8528 continue; 8529 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8530 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8531 // If the first component is a member expression, we have to look into 8532 // 'this', which maps to null in the map of map information. Otherwise 8533 // look directly for the information. 8534 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8535 8536 // We potentially have map information for this declaration already. 8537 // Look for the first set of components that refer to it. 8538 if (It != Info.end()) { 8539 bool Found = false; 8540 for (auto &Data : It->second) { 8541 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8542 return MI.Components.back().getAssociatedDeclaration() == VD; 8543 }); 8544 // If we found a map entry, signal that the pointer has to be 8545 // returned and move on to the next declaration. 8546 if (CI != Data.end()) { 8547 CI->ReturnDevicePointer = true; 8548 Found = true; 8549 break; 8550 } 8551 } 8552 if (Found) 8553 continue; 8554 } 8555 8556 // We didn't find any match in our map information - generate a zero 8557 // size array section - if the pointer is a struct member we defer this 8558 // action until the whole struct has been processed. 8559 if (isa<MemberExpr>(IE)) { 8560 // Insert the pointer into Info to be processed by 8561 // generateInfoForComponentList. Because it is a member pointer 8562 // without a pointee, no entry will be generated for it, therefore 8563 // we need to generate one after the whole struct has been processed. 8564 // Nonetheless, generateInfoForComponentList must be called to take 8565 // the pointer into account for the calculation of the range of the 8566 // partial struct. 8567 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8568 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8569 nullptr, nullptr, /*ForDeviceAddr=*/true); 8570 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8571 } else { 8572 llvm::Value *Ptr; 8573 if (IE->isGLValue()) 8574 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8575 else 8576 Ptr = CGF.EmitScalarExpr(IE); 8577 CombinedInfo.Exprs.push_back(VD); 8578 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8579 CombinedInfo.Pointers.push_back(Ptr); 8580 CombinedInfo.Sizes.push_back( 8581 llvm::Constant::getNullValue(CGF.Int64Ty)); 8582 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8583 CombinedInfo.Mappers.push_back(nullptr); 8584 } 8585 } 8586 } 8587 8588 for (const auto &Data : Info) { 8589 StructRangeInfoTy PartialStruct; 8590 // Temporary generated information. 8591 MapCombinedInfoTy CurInfo; 8592 const Decl *D = Data.first; 8593 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8594 for (const auto &M : Data.second) { 8595 for (const MapInfo &L : M) { 8596 assert(!L.Components.empty() && 8597 "Not expecting declaration with no component lists."); 8598 8599 // Remember the current base pointer index. 8600 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8601 CurInfo.NonContigInfo.IsNonContiguous = 8602 L.Components.back().isNonContiguous(); 8603 generateInfoForComponentList( 8604 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8605 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8606 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8607 8608 // If this entry relates with a device pointer, set the relevant 8609 // declaration and add the 'return pointer' flag. 8610 if (L.ReturnDevicePointer) { 8611 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8612 "Unexpected number of mapped base pointers."); 8613 8614 const ValueDecl *RelevantVD = 8615 L.Components.back().getAssociatedDeclaration(); 8616 assert(RelevantVD && 8617 "No relevant declaration related with device pointer??"); 8618 8619 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8620 RelevantVD); 8621 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8622 } 8623 } 8624 } 8625 8626 // Append any pending zero-length pointers which are struct members and 8627 // used with use_device_ptr or use_device_addr. 8628 auto CI = DeferredInfo.find(Data.first); 8629 if (CI != DeferredInfo.end()) { 8630 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8631 llvm::Value *BasePtr; 8632 llvm::Value *Ptr; 8633 if (L.ForDeviceAddr) { 8634 if (L.IE->isGLValue()) 8635 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8636 else 8637 Ptr = this->CGF.EmitScalarExpr(L.IE); 8638 BasePtr = Ptr; 8639 // Entry is RETURN_PARAM. Also, set the placeholder value 8640 // MEMBER_OF=FFFF so that the entry is later updated with the 8641 // correct value of MEMBER_OF. 8642 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8643 } else { 8644 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8645 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8646 L.IE->getExprLoc()); 8647 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8648 // placeholder value MEMBER_OF=FFFF so that the entry is later 8649 // updated with the correct value of MEMBER_OF. 8650 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8651 OMP_MAP_MEMBER_OF); 8652 } 8653 CurInfo.Exprs.push_back(L.VD); 8654 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8655 CurInfo.Pointers.push_back(Ptr); 8656 CurInfo.Sizes.push_back( 8657 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8658 CurInfo.Mappers.push_back(nullptr); 8659 } 8660 } 8661 // If there is an entry in PartialStruct it means we have a struct with 8662 // individual members mapped. Emit an extra combined entry. 8663 if (PartialStruct.Base.isValid()) { 8664 CurInfo.NonContigInfo.Dims.push_back(0); 8665 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8666 } 8667 8668 // We need to append the results of this capture to what we already 8669 // have. 8670 CombinedInfo.append(CurInfo); 8671 } 8672 // Append data for use_device_ptr clauses. 8673 CombinedInfo.append(UseDevicePtrCombinedInfo); 8674 } 8675 8676 public: 8677 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8678 : CurDir(&Dir), CGF(CGF) { 8679 // Extract firstprivate clause information. 8680 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8681 for (const auto *D : C->varlists()) 8682 FirstPrivateDecls.try_emplace( 8683 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8684 // Extract implicit firstprivates from uses_allocators clauses. 8685 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8686 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8687 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8688 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8689 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8690 /*Implicit=*/true); 8691 else if (const auto *VD = dyn_cast<VarDecl>( 8692 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8693 ->getDecl())) 8694 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8695 } 8696 } 8697 // Extract device pointer clause information. 8698 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8699 for (auto L : C->component_lists()) 8700 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8701 } 8702 8703 /// Constructor for the declare mapper directive. 8704 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8705 : CurDir(&Dir), CGF(CGF) {} 8706 8707 /// Generate code for the combined entry if we have a partially mapped struct 8708 /// and take care of the mapping flags of the arguments corresponding to 8709 /// individual struct members. 8710 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8711 MapFlagsArrayTy &CurTypes, 8712 const StructRangeInfoTy &PartialStruct, 8713 const ValueDecl *VD = nullptr, 8714 bool NotTargetParams = true) const { 8715 if (CurTypes.size() == 1 && 8716 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8717 !PartialStruct.IsArraySection) 8718 return; 8719 Address LBAddr = PartialStruct.LowestElem.second; 8720 Address HBAddr = PartialStruct.HighestElem.second; 8721 if (PartialStruct.HasCompleteRecord) { 8722 LBAddr = PartialStruct.LB; 8723 HBAddr = PartialStruct.LB; 8724 } 8725 CombinedInfo.Exprs.push_back(VD); 8726 // Base is the base of the struct 8727 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8728 // Pointer is the address of the lowest element 8729 llvm::Value *LB = LBAddr.getPointer(); 8730 CombinedInfo.Pointers.push_back(LB); 8731 // There should not be a mapper for a combined entry. 8732 CombinedInfo.Mappers.push_back(nullptr); 8733 // Size is (addr of {highest+1} element) - (addr of lowest element) 8734 llvm::Value *HB = HBAddr.getPointer(); 8735 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8736 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8737 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8738 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8739 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8740 /*isSigned=*/false); 8741 CombinedInfo.Sizes.push_back(Size); 8742 // Map type is always TARGET_PARAM, if generate info for captures. 8743 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8744 : OMP_MAP_TARGET_PARAM); 8745 // If any element has the present modifier, then make sure the runtime 8746 // doesn't attempt to allocate the struct. 8747 if (CurTypes.end() != 8748 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8749 return Type & OMP_MAP_PRESENT; 8750 })) 8751 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8752 // Remove TARGET_PARAM flag from the first element 8753 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8754 8755 // All other current entries will be MEMBER_OF the combined entry 8756 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8757 // 0xFFFF in the MEMBER_OF field). 8758 OpenMPOffloadMappingFlags MemberOfFlag = 8759 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8760 for (auto &M : CurTypes) 8761 setCorrectMemberOfFlag(M, MemberOfFlag); 8762 } 8763 8764 /// Generate all the base pointers, section pointers, sizes, map types, and 8765 /// mappers for the extracted mappable expressions (all included in \a 8766 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8767 /// pair of the relevant declaration and index where it occurs is appended to 8768 /// the device pointers info array. 8769 void generateAllInfo( 8770 MapCombinedInfoTy &CombinedInfo, 8771 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8772 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8773 assert(CurDir.is<const OMPExecutableDirective *>() && 8774 "Expect a executable directive"); 8775 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8776 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8777 } 8778 8779 /// Generate all the base pointers, section pointers, sizes, map types, and 8780 /// mappers for the extracted map clauses of user-defined mapper (all included 8781 /// in \a CombinedInfo). 8782 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8783 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8784 "Expect a declare mapper directive"); 8785 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8786 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8787 } 8788 8789 /// Emit capture info for lambdas for variables captured by reference. 8790 void generateInfoForLambdaCaptures( 8791 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8792 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8793 const auto *RD = VD->getType() 8794 .getCanonicalType() 8795 .getNonReferenceType() 8796 ->getAsCXXRecordDecl(); 8797 if (!RD || !RD->isLambda()) 8798 return; 8799 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8800 LValue VDLVal = CGF.MakeAddrLValue( 8801 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8802 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8803 FieldDecl *ThisCapture = nullptr; 8804 RD->getCaptureFields(Captures, ThisCapture); 8805 if (ThisCapture) { 8806 LValue ThisLVal = 8807 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8808 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8809 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8810 VDLVal.getPointer(CGF)); 8811 CombinedInfo.Exprs.push_back(VD); 8812 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8813 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8814 CombinedInfo.Sizes.push_back( 8815 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8816 CGF.Int64Ty, /*isSigned=*/true)); 8817 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8818 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8819 CombinedInfo.Mappers.push_back(nullptr); 8820 } 8821 for (const LambdaCapture &LC : RD->captures()) { 8822 if (!LC.capturesVariable()) 8823 continue; 8824 const VarDecl *VD = LC.getCapturedVar(); 8825 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8826 continue; 8827 auto It = Captures.find(VD); 8828 assert(It != Captures.end() && "Found lambda capture without field."); 8829 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8830 if (LC.getCaptureKind() == LCK_ByRef) { 8831 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8832 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8833 VDLVal.getPointer(CGF)); 8834 CombinedInfo.Exprs.push_back(VD); 8835 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8836 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8837 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8838 CGF.getTypeSize( 8839 VD->getType().getCanonicalType().getNonReferenceType()), 8840 CGF.Int64Ty, /*isSigned=*/true)); 8841 } else { 8842 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8843 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8844 VDLVal.getPointer(CGF)); 8845 CombinedInfo.Exprs.push_back(VD); 8846 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8847 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8848 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8849 } 8850 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8851 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8852 CombinedInfo.Mappers.push_back(nullptr); 8853 } 8854 } 8855 8856 /// Set correct indices for lambdas captures. 8857 void adjustMemberOfForLambdaCaptures( 8858 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8859 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8860 MapFlagsArrayTy &Types) const { 8861 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8862 // Set correct member_of idx for all implicit lambda captures. 8863 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8864 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8865 continue; 8866 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8867 assert(BasePtr && "Unable to find base lambda address."); 8868 int TgtIdx = -1; 8869 for (unsigned J = I; J > 0; --J) { 8870 unsigned Idx = J - 1; 8871 if (Pointers[Idx] != BasePtr) 8872 continue; 8873 TgtIdx = Idx; 8874 break; 8875 } 8876 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8877 // All other current entries will be MEMBER_OF the combined entry 8878 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8879 // 0xFFFF in the MEMBER_OF field). 8880 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8881 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8882 } 8883 } 8884 8885 /// Generate the base pointers, section pointers, sizes, map types, and 8886 /// mappers associated to a given capture (all included in \a CombinedInfo). 8887 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8888 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8889 StructRangeInfoTy &PartialStruct) const { 8890 assert(!Cap->capturesVariableArrayType() && 8891 "Not expecting to generate map info for a variable array type!"); 8892 8893 // We need to know when we generating information for the first component 8894 const ValueDecl *VD = Cap->capturesThis() 8895 ? nullptr 8896 : Cap->getCapturedVar()->getCanonicalDecl(); 8897 8898 // If this declaration appears in a is_device_ptr clause we just have to 8899 // pass the pointer by value. If it is a reference to a declaration, we just 8900 // pass its value. 8901 if (DevPointersMap.count(VD)) { 8902 CombinedInfo.Exprs.push_back(VD); 8903 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8904 CombinedInfo.Pointers.push_back(Arg); 8905 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8906 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8907 /*isSigned=*/true)); 8908 CombinedInfo.Types.push_back( 8909 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8910 OMP_MAP_TARGET_PARAM); 8911 CombinedInfo.Mappers.push_back(nullptr); 8912 return; 8913 } 8914 8915 using MapData = 8916 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8917 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8918 const ValueDecl *, const Expr *>; 8919 SmallVector<MapData, 4> DeclComponentLists; 8920 assert(CurDir.is<const OMPExecutableDirective *>() && 8921 "Expect a executable directive"); 8922 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8923 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8924 const auto *EI = C->getVarRefs().begin(); 8925 for (const auto L : C->decl_component_lists(VD)) { 8926 const ValueDecl *VDecl, *Mapper; 8927 // The Expression is not correct if the mapping is implicit 8928 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8929 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8930 std::tie(VDecl, Components, Mapper) = L; 8931 assert(VDecl == VD && "We got information for the wrong declaration??"); 8932 assert(!Components.empty() && 8933 "Not expecting declaration with no component lists."); 8934 DeclComponentLists.emplace_back(Components, C->getMapType(), 8935 C->getMapTypeModifiers(), 8936 C->isImplicit(), Mapper, E); 8937 ++EI; 8938 } 8939 } 8940 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8941 const MapData &RHS) { 8942 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8943 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8944 bool HasPresent = !MapModifiers.empty() && 8945 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8946 return K == clang::OMPC_MAP_MODIFIER_present; 8947 }); 8948 bool HasAllocs = MapType == OMPC_MAP_alloc; 8949 MapModifiers = std::get<2>(RHS); 8950 MapType = std::get<1>(LHS); 8951 bool HasPresentR = 8952 !MapModifiers.empty() && 8953 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8954 return K == clang::OMPC_MAP_MODIFIER_present; 8955 }); 8956 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8957 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8958 }); 8959 8960 // Find overlapping elements (including the offset from the base element). 8961 llvm::SmallDenseMap< 8962 const MapData *, 8963 llvm::SmallVector< 8964 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8965 4> 8966 OverlappedData; 8967 size_t Count = 0; 8968 for (const MapData &L : DeclComponentLists) { 8969 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8970 OpenMPMapClauseKind MapType; 8971 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8972 bool IsImplicit; 8973 const ValueDecl *Mapper; 8974 const Expr *VarRef; 8975 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8976 L; 8977 ++Count; 8978 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8979 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8980 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8981 VarRef) = L1; 8982 auto CI = Components.rbegin(); 8983 auto CE = Components.rend(); 8984 auto SI = Components1.rbegin(); 8985 auto SE = Components1.rend(); 8986 for (; CI != CE && SI != SE; ++CI, ++SI) { 8987 if (CI->getAssociatedExpression()->getStmtClass() != 8988 SI->getAssociatedExpression()->getStmtClass()) 8989 break; 8990 // Are we dealing with different variables/fields? 8991 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8992 break; 8993 } 8994 // Found overlapping if, at least for one component, reached the head 8995 // of the components list. 8996 if (CI == CE || SI == SE) { 8997 // Ignore it if it is the same component. 8998 if (CI == CE && SI == SE) 8999 continue; 9000 const auto It = (SI == SE) ? CI : SI; 9001 // If one component is a pointer and another one is a kind of 9002 // dereference of this pointer (array subscript, section, dereference, 9003 // etc.), it is not an overlapping. 9004 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9005 std::prev(It) 9006 ->getAssociatedExpression() 9007 ->getType() 9008 ->isPointerType()) 9009 continue; 9010 const MapData &BaseData = CI == CE ? L : L1; 9011 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9012 SI == SE ? Components : Components1; 9013 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9014 OverlappedElements.getSecond().push_back(SubData); 9015 } 9016 } 9017 } 9018 // Sort the overlapped elements for each item. 9019 llvm::SmallVector<const FieldDecl *, 4> Layout; 9020 if (!OverlappedData.empty()) { 9021 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9022 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9023 while (BaseType != OrigType) { 9024 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9025 OrigType = BaseType->getPointeeOrArrayElementType(); 9026 } 9027 9028 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9029 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9030 else { 9031 const auto *RD = BaseType->getAsRecordDecl(); 9032 Layout.append(RD->field_begin(), RD->field_end()); 9033 } 9034 } 9035 for (auto &Pair : OverlappedData) { 9036 llvm::stable_sort( 9037 Pair.getSecond(), 9038 [&Layout]( 9039 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9040 OMPClauseMappableExprCommon::MappableExprComponentListRef 9041 Second) { 9042 auto CI = First.rbegin(); 9043 auto CE = First.rend(); 9044 auto SI = Second.rbegin(); 9045 auto SE = Second.rend(); 9046 for (; CI != CE && SI != SE; ++CI, ++SI) { 9047 if (CI->getAssociatedExpression()->getStmtClass() != 9048 SI->getAssociatedExpression()->getStmtClass()) 9049 break; 9050 // Are we dealing with different variables/fields? 9051 if (CI->getAssociatedDeclaration() != 9052 SI->getAssociatedDeclaration()) 9053 break; 9054 } 9055 9056 // Lists contain the same elements. 9057 if (CI == CE && SI == SE) 9058 return false; 9059 9060 // List with less elements is less than list with more elements. 9061 if (CI == CE || SI == SE) 9062 return CI == CE; 9063 9064 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9065 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9066 if (FD1->getParent() == FD2->getParent()) 9067 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9068 const auto It = 9069 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9070 return FD == FD1 || FD == FD2; 9071 }); 9072 return *It == FD1; 9073 }); 9074 } 9075 9076 // Associated with a capture, because the mapping flags depend on it. 9077 // Go through all of the elements with the overlapped elements. 9078 bool IsFirstComponentList = true; 9079 for (const auto &Pair : OverlappedData) { 9080 const MapData &L = *Pair.getFirst(); 9081 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9082 OpenMPMapClauseKind MapType; 9083 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9084 bool IsImplicit; 9085 const ValueDecl *Mapper; 9086 const Expr *VarRef; 9087 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9088 L; 9089 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9090 OverlappedComponents = Pair.getSecond(); 9091 generateInfoForComponentList( 9092 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9093 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9094 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9095 IsFirstComponentList = false; 9096 } 9097 // Go through other elements without overlapped elements. 9098 for (const MapData &L : DeclComponentLists) { 9099 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9100 OpenMPMapClauseKind MapType; 9101 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9102 bool IsImplicit; 9103 const ValueDecl *Mapper; 9104 const Expr *VarRef; 9105 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9106 L; 9107 auto It = OverlappedData.find(&L); 9108 if (It == OverlappedData.end()) 9109 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9110 Components, CombinedInfo, PartialStruct, 9111 IsFirstComponentList, IsImplicit, Mapper, 9112 /*ForDeviceAddr=*/false, VD, VarRef); 9113 IsFirstComponentList = false; 9114 } 9115 } 9116 9117 /// Generate the default map information for a given capture \a CI, 9118 /// record field declaration \a RI and captured value \a CV. 9119 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9120 const FieldDecl &RI, llvm::Value *CV, 9121 MapCombinedInfoTy &CombinedInfo) const { 9122 bool IsImplicit = true; 9123 // Do the default mapping. 9124 if (CI.capturesThis()) { 9125 CombinedInfo.Exprs.push_back(nullptr); 9126 CombinedInfo.BasePointers.push_back(CV); 9127 CombinedInfo.Pointers.push_back(CV); 9128 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9129 CombinedInfo.Sizes.push_back( 9130 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9131 CGF.Int64Ty, /*isSigned=*/true)); 9132 // Default map type. 9133 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9134 } else if (CI.capturesVariableByCopy()) { 9135 const VarDecl *VD = CI.getCapturedVar(); 9136 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9137 CombinedInfo.BasePointers.push_back(CV); 9138 CombinedInfo.Pointers.push_back(CV); 9139 if (!RI.getType()->isAnyPointerType()) { 9140 // We have to signal to the runtime captures passed by value that are 9141 // not pointers. 9142 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9143 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9144 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9145 } else { 9146 // Pointers are implicitly mapped with a zero size and no flags 9147 // (other than first map that is added for all implicit maps). 9148 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9149 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9150 } 9151 auto I = FirstPrivateDecls.find(VD); 9152 if (I != FirstPrivateDecls.end()) 9153 IsImplicit = I->getSecond(); 9154 } else { 9155 assert(CI.capturesVariable() && "Expected captured reference."); 9156 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9157 QualType ElementType = PtrTy->getPointeeType(); 9158 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9159 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9160 // The default map type for a scalar/complex type is 'to' because by 9161 // default the value doesn't have to be retrieved. For an aggregate 9162 // type, the default is 'tofrom'. 9163 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9164 const VarDecl *VD = CI.getCapturedVar(); 9165 auto I = FirstPrivateDecls.find(VD); 9166 if (I != FirstPrivateDecls.end() && 9167 VD->getType().isConstant(CGF.getContext())) { 9168 llvm::Constant *Addr = 9169 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9170 // Copy the value of the original variable to the new global copy. 9171 CGF.Builder.CreateMemCpy( 9172 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9173 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9174 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9175 // Use new global variable as the base pointers. 9176 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9177 CombinedInfo.BasePointers.push_back(Addr); 9178 CombinedInfo.Pointers.push_back(Addr); 9179 } else { 9180 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9181 CombinedInfo.BasePointers.push_back(CV); 9182 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9183 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9184 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9185 AlignmentSource::Decl)); 9186 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9187 } else { 9188 CombinedInfo.Pointers.push_back(CV); 9189 } 9190 } 9191 if (I != FirstPrivateDecls.end()) 9192 IsImplicit = I->getSecond(); 9193 } 9194 // Every default map produces a single argument which is a target parameter. 9195 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9196 9197 // Add flag stating this is an implicit map. 9198 if (IsImplicit) 9199 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9200 9201 // No user-defined mapper for default mapping. 9202 CombinedInfo.Mappers.push_back(nullptr); 9203 } 9204 }; 9205 } // anonymous namespace 9206 9207 static void emitNonContiguousDescriptor( 9208 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9209 CGOpenMPRuntime::TargetDataInfo &Info) { 9210 CodeGenModule &CGM = CGF.CGM; 9211 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9212 &NonContigInfo = CombinedInfo.NonContigInfo; 9213 9214 // Build an array of struct descriptor_dim and then assign it to 9215 // offload_args. 9216 // 9217 // struct descriptor_dim { 9218 // uint64_t offset; 9219 // uint64_t count; 9220 // uint64_t stride 9221 // }; 9222 ASTContext &C = CGF.getContext(); 9223 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9224 RecordDecl *RD; 9225 RD = C.buildImplicitRecord("descriptor_dim"); 9226 RD->startDefinition(); 9227 addFieldToRecordDecl(C, RD, Int64Ty); 9228 addFieldToRecordDecl(C, RD, Int64Ty); 9229 addFieldToRecordDecl(C, RD, Int64Ty); 9230 RD->completeDefinition(); 9231 QualType DimTy = C.getRecordType(RD); 9232 9233 enum { OffsetFD = 0, CountFD, StrideFD }; 9234 // We need two index variable here since the size of "Dims" is the same as the 9235 // size of Components, however, the size of offset, count, and stride is equal 9236 // to the size of base declaration that is non-contiguous. 9237 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9238 // Skip emitting ir if dimension size is 1 since it cannot be 9239 // non-contiguous. 9240 if (NonContigInfo.Dims[I] == 1) 9241 continue; 9242 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9243 QualType ArrayTy = 9244 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9245 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9246 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9247 unsigned RevIdx = EE - II - 1; 9248 LValue DimsLVal = CGF.MakeAddrLValue( 9249 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9250 // Offset 9251 LValue OffsetLVal = CGF.EmitLValueForField( 9252 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9253 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9254 // Count 9255 LValue CountLVal = CGF.EmitLValueForField( 9256 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9257 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9258 // Stride 9259 LValue StrideLVal = CGF.EmitLValueForField( 9260 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9261 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9262 } 9263 // args[I] = &dims 9264 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9265 DimsAddr, CGM.Int8PtrTy); 9266 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9267 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9268 Info.PointersArray, 0, I); 9269 Address PAddr(P, CGF.getPointerAlign()); 9270 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9271 ++L; 9272 } 9273 } 9274 9275 /// Emit a string constant containing the names of the values mapped to the 9276 /// offloading runtime library. 9277 llvm::Constant * 9278 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9279 MappableExprsHandler::MappingExprInfo &MapExprs) { 9280 llvm::Constant *SrcLocStr; 9281 if (!MapExprs.getMapDecl()) { 9282 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9283 } else { 9284 std::string ExprName = ""; 9285 if (MapExprs.getMapExpr()) { 9286 PrintingPolicy P(CGF.getContext().getLangOpts()); 9287 llvm::raw_string_ostream OS(ExprName); 9288 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9289 OS.flush(); 9290 } else { 9291 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9292 } 9293 9294 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9295 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9296 const char *FileName = PLoc.getFilename(); 9297 unsigned Line = PLoc.getLine(); 9298 unsigned Column = PLoc.getColumn(); 9299 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9300 Line, Column); 9301 } 9302 return SrcLocStr; 9303 } 9304 9305 /// Emit the arrays used to pass the captures and map information to the 9306 /// offloading runtime library. If there is no map or capture information, 9307 /// return nullptr by reference. 9308 static void emitOffloadingArrays( 9309 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9310 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9311 bool IsNonContiguous = false) { 9312 CodeGenModule &CGM = CGF.CGM; 9313 ASTContext &Ctx = CGF.getContext(); 9314 9315 // Reset the array information. 9316 Info.clearArrayInfo(); 9317 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9318 9319 if (Info.NumberOfPtrs) { 9320 // Detect if we have any capture size requiring runtime evaluation of the 9321 // size so that a constant array could be eventually used. 9322 bool hasRuntimeEvaluationCaptureSize = false; 9323 for (llvm::Value *S : CombinedInfo.Sizes) 9324 if (!isa<llvm::Constant>(S)) { 9325 hasRuntimeEvaluationCaptureSize = true; 9326 break; 9327 } 9328 9329 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9330 QualType PointerArrayType = Ctx.getConstantArrayType( 9331 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9332 /*IndexTypeQuals=*/0); 9333 9334 Info.BasePointersArray = 9335 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9336 Info.PointersArray = 9337 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9338 Address MappersArray = 9339 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9340 Info.MappersArray = MappersArray.getPointer(); 9341 9342 // If we don't have any VLA types or other types that require runtime 9343 // evaluation, we can use a constant array for the map sizes, otherwise we 9344 // need to fill up the arrays as we do for the pointers. 9345 QualType Int64Ty = 9346 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9347 if (hasRuntimeEvaluationCaptureSize) { 9348 QualType SizeArrayType = Ctx.getConstantArrayType( 9349 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9350 /*IndexTypeQuals=*/0); 9351 Info.SizesArray = 9352 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9353 } else { 9354 // We expect all the sizes to be constant, so we collect them to create 9355 // a constant array. 9356 SmallVector<llvm::Constant *, 16> ConstSizes; 9357 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9358 if (IsNonContiguous && 9359 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9360 ConstSizes.push_back(llvm::ConstantInt::get( 9361 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9362 } else { 9363 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9364 } 9365 } 9366 9367 auto *SizesArrayInit = llvm::ConstantArray::get( 9368 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9369 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9370 auto *SizesArrayGbl = new llvm::GlobalVariable( 9371 CGM.getModule(), SizesArrayInit->getType(), 9372 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9373 SizesArrayInit, Name); 9374 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9375 Info.SizesArray = SizesArrayGbl; 9376 } 9377 9378 // The map types are always constant so we don't need to generate code to 9379 // fill arrays. Instead, we create an array constant. 9380 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9381 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9382 std::string MaptypesName = 9383 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9384 auto *MapTypesArrayGbl = 9385 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9386 Info.MapTypesArray = MapTypesArrayGbl; 9387 9388 // The information types are only built if there is debug information 9389 // requested. 9390 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9391 Info.MapNamesArray = llvm::Constant::getNullValue( 9392 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9393 } else { 9394 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9395 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9396 }; 9397 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9398 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9399 std::string MapnamesName = 9400 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9401 auto *MapNamesArrayGbl = 9402 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9403 Info.MapNamesArray = MapNamesArrayGbl; 9404 } 9405 9406 // If there's a present map type modifier, it must not be applied to the end 9407 // of a region, so generate a separate map type array in that case. 9408 if (Info.separateBeginEndCalls()) { 9409 bool EndMapTypesDiffer = false; 9410 for (uint64_t &Type : Mapping) { 9411 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9412 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9413 EndMapTypesDiffer = true; 9414 } 9415 } 9416 if (EndMapTypesDiffer) { 9417 MapTypesArrayGbl = 9418 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9419 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9420 } 9421 } 9422 9423 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9424 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9425 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9426 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9427 Info.BasePointersArray, 0, I); 9428 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9429 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9430 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9431 CGF.Builder.CreateStore(BPVal, BPAddr); 9432 9433 if (Info.requiresDevicePointerInfo()) 9434 if (const ValueDecl *DevVD = 9435 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9436 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9437 9438 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9439 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9440 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9441 Info.PointersArray, 0, I); 9442 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9443 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9444 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9445 CGF.Builder.CreateStore(PVal, PAddr); 9446 9447 if (hasRuntimeEvaluationCaptureSize) { 9448 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9449 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9450 Info.SizesArray, 9451 /*Idx0=*/0, 9452 /*Idx1=*/I); 9453 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9454 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9455 CGM.Int64Ty, 9456 /*isSigned=*/true), 9457 SAddr); 9458 } 9459 9460 // Fill up the mapper array. 9461 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9462 if (CombinedInfo.Mappers[I]) { 9463 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9464 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9465 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9466 Info.HasMapper = true; 9467 } 9468 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9469 CGF.Builder.CreateStore(MFunc, MAddr); 9470 } 9471 } 9472 9473 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9474 Info.NumberOfPtrs == 0) 9475 return; 9476 9477 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9478 } 9479 9480 namespace { 9481 /// Additional arguments for emitOffloadingArraysArgument function. 9482 struct ArgumentsOptions { 9483 bool ForEndCall = false; 9484 ArgumentsOptions() = default; 9485 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9486 }; 9487 } // namespace 9488 9489 /// Emit the arguments to be passed to the runtime library based on the 9490 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9491 /// ForEndCall, emit map types to be passed for the end of the region instead of 9492 /// the beginning. 9493 static void emitOffloadingArraysArgument( 9494 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9495 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9496 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9497 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9498 const ArgumentsOptions &Options = ArgumentsOptions()) { 9499 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9500 "expected region end call to runtime only when end call is separate"); 9501 CodeGenModule &CGM = CGF.CGM; 9502 if (Info.NumberOfPtrs) { 9503 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9504 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9505 Info.BasePointersArray, 9506 /*Idx0=*/0, /*Idx1=*/0); 9507 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9508 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9509 Info.PointersArray, 9510 /*Idx0=*/0, 9511 /*Idx1=*/0); 9512 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9513 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9514 /*Idx0=*/0, /*Idx1=*/0); 9515 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9516 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9517 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9518 : Info.MapTypesArray, 9519 /*Idx0=*/0, 9520 /*Idx1=*/0); 9521 9522 // Only emit the mapper information arrays if debug information is 9523 // requested. 9524 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9525 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9526 else 9527 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9528 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9529 Info.MapNamesArray, 9530 /*Idx0=*/0, 9531 /*Idx1=*/0); 9532 // If there is no user-defined mapper, set the mapper array to nullptr to 9533 // avoid an unnecessary data privatization 9534 if (!Info.HasMapper) 9535 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9536 else 9537 MappersArrayArg = 9538 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9539 } else { 9540 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9541 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9542 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9543 MapTypesArrayArg = 9544 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9545 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9546 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9547 } 9548 } 9549 9550 /// Check for inner distribute directive. 9551 static const OMPExecutableDirective * 9552 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9553 const auto *CS = D.getInnermostCapturedStmt(); 9554 const auto *Body = 9555 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9556 const Stmt *ChildStmt = 9557 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9558 9559 if (const auto *NestedDir = 9560 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9561 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9562 switch (D.getDirectiveKind()) { 9563 case OMPD_target: 9564 if (isOpenMPDistributeDirective(DKind)) 9565 return NestedDir; 9566 if (DKind == OMPD_teams) { 9567 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9568 /*IgnoreCaptured=*/true); 9569 if (!Body) 9570 return nullptr; 9571 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9572 if (const auto *NND = 9573 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9574 DKind = NND->getDirectiveKind(); 9575 if (isOpenMPDistributeDirective(DKind)) 9576 return NND; 9577 } 9578 } 9579 return nullptr; 9580 case OMPD_target_teams: 9581 if (isOpenMPDistributeDirective(DKind)) 9582 return NestedDir; 9583 return nullptr; 9584 case OMPD_target_parallel: 9585 case OMPD_target_simd: 9586 case OMPD_target_parallel_for: 9587 case OMPD_target_parallel_for_simd: 9588 return nullptr; 9589 case OMPD_target_teams_distribute: 9590 case OMPD_target_teams_distribute_simd: 9591 case OMPD_target_teams_distribute_parallel_for: 9592 case OMPD_target_teams_distribute_parallel_for_simd: 9593 case OMPD_parallel: 9594 case OMPD_for: 9595 case OMPD_parallel_for: 9596 case OMPD_parallel_master: 9597 case OMPD_parallel_sections: 9598 case OMPD_for_simd: 9599 case OMPD_parallel_for_simd: 9600 case OMPD_cancel: 9601 case OMPD_cancellation_point: 9602 case OMPD_ordered: 9603 case OMPD_threadprivate: 9604 case OMPD_allocate: 9605 case OMPD_task: 9606 case OMPD_simd: 9607 case OMPD_tile: 9608 case OMPD_unroll: 9609 case OMPD_sections: 9610 case OMPD_section: 9611 case OMPD_single: 9612 case OMPD_master: 9613 case OMPD_critical: 9614 case OMPD_taskyield: 9615 case OMPD_barrier: 9616 case OMPD_taskwait: 9617 case OMPD_taskgroup: 9618 case OMPD_atomic: 9619 case OMPD_flush: 9620 case OMPD_depobj: 9621 case OMPD_scan: 9622 case OMPD_teams: 9623 case OMPD_target_data: 9624 case OMPD_target_exit_data: 9625 case OMPD_target_enter_data: 9626 case OMPD_distribute: 9627 case OMPD_distribute_simd: 9628 case OMPD_distribute_parallel_for: 9629 case OMPD_distribute_parallel_for_simd: 9630 case OMPD_teams_distribute: 9631 case OMPD_teams_distribute_simd: 9632 case OMPD_teams_distribute_parallel_for: 9633 case OMPD_teams_distribute_parallel_for_simd: 9634 case OMPD_target_update: 9635 case OMPD_declare_simd: 9636 case OMPD_declare_variant: 9637 case OMPD_begin_declare_variant: 9638 case OMPD_end_declare_variant: 9639 case OMPD_declare_target: 9640 case OMPD_end_declare_target: 9641 case OMPD_declare_reduction: 9642 case OMPD_declare_mapper: 9643 case OMPD_taskloop: 9644 case OMPD_taskloop_simd: 9645 case OMPD_master_taskloop: 9646 case OMPD_master_taskloop_simd: 9647 case OMPD_parallel_master_taskloop: 9648 case OMPD_parallel_master_taskloop_simd: 9649 case OMPD_requires: 9650 case OMPD_unknown: 9651 default: 9652 llvm_unreachable("Unexpected directive."); 9653 } 9654 } 9655 9656 return nullptr; 9657 } 9658 9659 /// Emit the user-defined mapper function. The code generation follows the 9660 /// pattern in the example below. 9661 /// \code 9662 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9663 /// void *base, void *begin, 9664 /// int64_t size, int64_t type, 9665 /// void *name = nullptr) { 9666 /// // Allocate space for an array section first or add a base/begin for 9667 /// // pointer dereference. 9668 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9669 /// !maptype.IsDelete) 9670 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9671 /// size*sizeof(Ty), clearToFromMember(type)); 9672 /// // Map members. 9673 /// for (unsigned i = 0; i < size; i++) { 9674 /// // For each component specified by this mapper: 9675 /// for (auto c : begin[i]->all_components) { 9676 /// if (c.hasMapper()) 9677 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9678 /// c.arg_type, c.arg_name); 9679 /// else 9680 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9681 /// c.arg_begin, c.arg_size, c.arg_type, 9682 /// c.arg_name); 9683 /// } 9684 /// } 9685 /// // Delete the array section. 9686 /// if (size > 1 && maptype.IsDelete) 9687 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9688 /// size*sizeof(Ty), clearToFromMember(type)); 9689 /// } 9690 /// \endcode 9691 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9692 CodeGenFunction *CGF) { 9693 if (UDMMap.count(D) > 0) 9694 return; 9695 ASTContext &C = CGM.getContext(); 9696 QualType Ty = D->getType(); 9697 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9698 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9699 auto *MapperVarDecl = 9700 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9701 SourceLocation Loc = D->getLocation(); 9702 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9703 9704 // Prepare mapper function arguments and attributes. 9705 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9706 C.VoidPtrTy, ImplicitParamDecl::Other); 9707 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9708 ImplicitParamDecl::Other); 9709 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9710 C.VoidPtrTy, ImplicitParamDecl::Other); 9711 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9712 ImplicitParamDecl::Other); 9713 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9714 ImplicitParamDecl::Other); 9715 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9716 ImplicitParamDecl::Other); 9717 FunctionArgList Args; 9718 Args.push_back(&HandleArg); 9719 Args.push_back(&BaseArg); 9720 Args.push_back(&BeginArg); 9721 Args.push_back(&SizeArg); 9722 Args.push_back(&TypeArg); 9723 Args.push_back(&NameArg); 9724 const CGFunctionInfo &FnInfo = 9725 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9726 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9727 SmallString<64> TyStr; 9728 llvm::raw_svector_ostream Out(TyStr); 9729 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9730 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9731 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9732 Name, &CGM.getModule()); 9733 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9734 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9735 // Start the mapper function code generation. 9736 CodeGenFunction MapperCGF(CGM); 9737 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9738 // Compute the starting and end addresses of array elements. 9739 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9740 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9741 C.getPointerType(Int64Ty), Loc); 9742 // Prepare common arguments for array initiation and deletion. 9743 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9744 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9745 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9746 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9747 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9748 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9749 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9750 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9751 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9752 // Convert the size in bytes into the number of array elements. 9753 Size = MapperCGF.Builder.CreateExactUDiv( 9754 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9755 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9756 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9757 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9758 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9759 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9760 C.getPointerType(Int64Ty), Loc); 9761 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9762 MapperCGF.GetAddrOfLocalVar(&NameArg), 9763 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9764 9765 // Emit array initiation if this is an array section and \p MapType indicates 9766 // that memory allocation is required. 9767 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9768 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9769 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9770 9771 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9772 9773 // Emit the loop header block. 9774 MapperCGF.EmitBlock(HeadBB); 9775 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9776 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9777 // Evaluate whether the initial condition is satisfied. 9778 llvm::Value *IsEmpty = 9779 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9780 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9781 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9782 9783 // Emit the loop body block. 9784 MapperCGF.EmitBlock(BodyBB); 9785 llvm::BasicBlock *LastBB = BodyBB; 9786 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9787 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9788 PtrPHI->addIncoming(PtrBegin, EntryBB); 9789 Address PtrCurrent = 9790 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9791 .getAlignment() 9792 .alignmentOfArrayElement(ElementSize)); 9793 // Privatize the declared variable of mapper to be the current array element. 9794 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9795 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9796 (void)Scope.Privatize(); 9797 9798 // Get map clause information. Fill up the arrays with all mapped variables. 9799 MappableExprsHandler::MapCombinedInfoTy Info; 9800 MappableExprsHandler MEHandler(*D, MapperCGF); 9801 MEHandler.generateAllInfoForMapper(Info); 9802 9803 // Call the runtime API __tgt_mapper_num_components to get the number of 9804 // pre-existing components. 9805 llvm::Value *OffloadingArgs[] = {Handle}; 9806 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9807 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9808 OMPRTL___tgt_mapper_num_components), 9809 OffloadingArgs); 9810 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9811 PreviousSize, 9812 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9813 9814 // Fill up the runtime mapper handle for all components. 9815 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9816 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9817 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9818 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9819 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9820 llvm::Value *CurSizeArg = Info.Sizes[I]; 9821 llvm::Value *CurNameArg = 9822 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9823 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9824 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9825 9826 // Extract the MEMBER_OF field from the map type. 9827 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9828 llvm::Value *MemberMapType = 9829 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9830 9831 // Combine the map type inherited from user-defined mapper with that 9832 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9833 // bits of the \a MapType, which is the input argument of the mapper 9834 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9835 // bits of MemberMapType. 9836 // [OpenMP 5.0], 1.2.6. map-type decay. 9837 // | alloc | to | from | tofrom | release | delete 9838 // ---------------------------------------------------------- 9839 // alloc | alloc | alloc | alloc | alloc | release | delete 9840 // to | alloc | to | alloc | to | release | delete 9841 // from | alloc | alloc | from | from | release | delete 9842 // tofrom | alloc | to | from | tofrom | release | delete 9843 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9844 MapType, 9845 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9846 MappableExprsHandler::OMP_MAP_FROM)); 9847 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9848 llvm::BasicBlock *AllocElseBB = 9849 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9850 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9851 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9852 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9853 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9854 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9855 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9856 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9857 MapperCGF.EmitBlock(AllocBB); 9858 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9859 MemberMapType, 9860 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9861 MappableExprsHandler::OMP_MAP_FROM))); 9862 MapperCGF.Builder.CreateBr(EndBB); 9863 MapperCGF.EmitBlock(AllocElseBB); 9864 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9865 LeftToFrom, 9866 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9867 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9868 // In case of to, clear OMP_MAP_FROM. 9869 MapperCGF.EmitBlock(ToBB); 9870 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9871 MemberMapType, 9872 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9873 MapperCGF.Builder.CreateBr(EndBB); 9874 MapperCGF.EmitBlock(ToElseBB); 9875 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9876 LeftToFrom, 9877 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9878 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9879 // In case of from, clear OMP_MAP_TO. 9880 MapperCGF.EmitBlock(FromBB); 9881 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9882 MemberMapType, 9883 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9884 // In case of tofrom, do nothing. 9885 MapperCGF.EmitBlock(EndBB); 9886 LastBB = EndBB; 9887 llvm::PHINode *CurMapType = 9888 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9889 CurMapType->addIncoming(AllocMapType, AllocBB); 9890 CurMapType->addIncoming(ToMapType, ToBB); 9891 CurMapType->addIncoming(FromMapType, FromBB); 9892 CurMapType->addIncoming(MemberMapType, ToElseBB); 9893 9894 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9895 CurSizeArg, CurMapType, CurNameArg}; 9896 if (Info.Mappers[I]) { 9897 // Call the corresponding mapper function. 9898 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9899 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9900 assert(MapperFunc && "Expect a valid mapper function is available."); 9901 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9902 } else { 9903 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9904 // data structure. 9905 MapperCGF.EmitRuntimeCall( 9906 OMPBuilder.getOrCreateRuntimeFunction( 9907 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9908 OffloadingArgs); 9909 } 9910 } 9911 9912 // Update the pointer to point to the next element that needs to be mapped, 9913 // and check whether we have mapped all elements. 9914 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9915 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9916 PtrPHI->addIncoming(PtrNext, LastBB); 9917 llvm::Value *IsDone = 9918 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9919 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9920 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9921 9922 MapperCGF.EmitBlock(ExitBB); 9923 // Emit array deletion if this is an array section and \p MapType indicates 9924 // that deletion is required. 9925 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9926 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9927 9928 // Emit the function exit block. 9929 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9930 MapperCGF.FinishFunction(); 9931 UDMMap.try_emplace(D, Fn); 9932 if (CGF) { 9933 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9934 Decls.second.push_back(D); 9935 } 9936 } 9937 9938 /// Emit the array initialization or deletion portion for user-defined mapper 9939 /// code generation. First, it evaluates whether an array section is mapped and 9940 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9941 /// true, and \a MapType indicates to not delete this array, array 9942 /// initialization code is generated. If \a IsInit is false, and \a MapType 9943 /// indicates to not this array, array deletion code is generated. 9944 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9945 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9946 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9947 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9948 bool IsInit) { 9949 StringRef Prefix = IsInit ? ".init" : ".del"; 9950 9951 // Evaluate if this is an array section. 9952 llvm::BasicBlock *BodyBB = 9953 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9954 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9955 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9956 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9957 MapType, 9958 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9959 llvm::Value *DeleteCond; 9960 llvm::Value *Cond; 9961 if (IsInit) { 9962 // base != begin? 9963 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9964 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9965 // IsPtrAndObj? 9966 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9967 MapType, 9968 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9969 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9970 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9971 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9972 DeleteCond = MapperCGF.Builder.CreateIsNull( 9973 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9974 } else { 9975 Cond = IsArray; 9976 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9977 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9978 } 9979 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9980 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9981 9982 MapperCGF.EmitBlock(BodyBB); 9983 // Get the array size by multiplying element size and element number (i.e., \p 9984 // Size). 9985 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9986 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9987 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9988 // memory allocation/deletion purpose only. 9989 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9990 MapType, 9991 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9992 MappableExprsHandler::OMP_MAP_FROM))); 9993 MapTypeArg = MapperCGF.Builder.CreateOr( 9994 MapTypeArg, 9995 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 9996 9997 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9998 // data structure. 9999 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10000 ArraySize, MapTypeArg, MapName}; 10001 MapperCGF.EmitRuntimeCall( 10002 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10003 OMPRTL___tgt_push_mapper_component), 10004 OffloadingArgs); 10005 } 10006 10007 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10008 const OMPDeclareMapperDecl *D) { 10009 auto I = UDMMap.find(D); 10010 if (I != UDMMap.end()) 10011 return I->second; 10012 emitUserDefinedMapper(D); 10013 return UDMMap.lookup(D); 10014 } 10015 10016 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10017 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10018 llvm::Value *DeviceID, 10019 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10020 const OMPLoopDirective &D)> 10021 SizeEmitter) { 10022 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10023 const OMPExecutableDirective *TD = &D; 10024 // Get nested teams distribute kind directive, if any. 10025 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10026 TD = getNestedDistributeDirective(CGM.getContext(), D); 10027 if (!TD) 10028 return; 10029 const auto *LD = cast<OMPLoopDirective>(TD); 10030 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10031 PrePostActionTy &) { 10032 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10033 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10034 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10035 CGF.EmitRuntimeCall( 10036 OMPBuilder.getOrCreateRuntimeFunction( 10037 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10038 Args); 10039 } 10040 }; 10041 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10042 } 10043 10044 void CGOpenMPRuntime::emitTargetCall( 10045 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10046 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10047 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10048 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10049 const OMPLoopDirective &D)> 10050 SizeEmitter) { 10051 if (!CGF.HaveInsertPoint()) 10052 return; 10053 10054 assert(OutlinedFn && "Invalid outlined function!"); 10055 10056 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10057 D.hasClausesOfKind<OMPNowaitClause>(); 10058 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10059 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10060 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10061 PrePostActionTy &) { 10062 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10063 }; 10064 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10065 10066 CodeGenFunction::OMPTargetDataInfo InputInfo; 10067 llvm::Value *MapTypesArray = nullptr; 10068 llvm::Value *MapNamesArray = nullptr; 10069 // Fill up the pointer arrays and transfer execution to the device. 10070 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10071 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10072 &CapturedVars, 10073 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10074 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10075 // Reverse offloading is not supported, so just execute on the host. 10076 if (RequiresOuterTask) { 10077 CapturedVars.clear(); 10078 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10079 } 10080 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10081 return; 10082 } 10083 10084 // On top of the arrays that were filled up, the target offloading call 10085 // takes as arguments the device id as well as the host pointer. The host 10086 // pointer is used by the runtime library to identify the current target 10087 // region, so it only has to be unique and not necessarily point to 10088 // anything. It could be the pointer to the outlined function that 10089 // implements the target region, but we aren't using that so that the 10090 // compiler doesn't need to keep that, and could therefore inline the host 10091 // function if proven worthwhile during optimization. 10092 10093 // From this point on, we need to have an ID of the target region defined. 10094 assert(OutlinedFnID && "Invalid outlined function ID!"); 10095 10096 // Emit device ID if any. 10097 llvm::Value *DeviceID; 10098 if (Device.getPointer()) { 10099 assert((Device.getInt() == OMPC_DEVICE_unknown || 10100 Device.getInt() == OMPC_DEVICE_device_num) && 10101 "Expected device_num modifier."); 10102 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10103 DeviceID = 10104 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10105 } else { 10106 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10107 } 10108 10109 // Emit the number of elements in the offloading arrays. 10110 llvm::Value *PointerNum = 10111 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10112 10113 // Return value of the runtime offloading call. 10114 llvm::Value *Return; 10115 10116 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10117 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10118 10119 // Source location for the ident struct 10120 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10121 10122 // Emit tripcount for the target loop-based directive. 10123 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10124 10125 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10126 // The target region is an outlined function launched by the runtime 10127 // via calls __tgt_target() or __tgt_target_teams(). 10128 // 10129 // __tgt_target() launches a target region with one team and one thread, 10130 // executing a serial region. This master thread may in turn launch 10131 // more threads within its team upon encountering a parallel region, 10132 // however, no additional teams can be launched on the device. 10133 // 10134 // __tgt_target_teams() launches a target region with one or more teams, 10135 // each with one or more threads. This call is required for target 10136 // constructs such as: 10137 // 'target teams' 10138 // 'target' / 'teams' 10139 // 'target teams distribute parallel for' 10140 // 'target parallel' 10141 // and so on. 10142 // 10143 // Note that on the host and CPU targets, the runtime implementation of 10144 // these calls simply call the outlined function without forking threads. 10145 // The outlined functions themselves have runtime calls to 10146 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10147 // the compiler in emitTeamsCall() and emitParallelCall(). 10148 // 10149 // In contrast, on the NVPTX target, the implementation of 10150 // __tgt_target_teams() launches a GPU kernel with the requested number 10151 // of teams and threads so no additional calls to the runtime are required. 10152 if (NumTeams) { 10153 // If we have NumTeams defined this means that we have an enclosed teams 10154 // region. Therefore we also expect to have NumThreads defined. These two 10155 // values should be defined in the presence of a teams directive, 10156 // regardless of having any clauses associated. If the user is using teams 10157 // but no clauses, these two values will be the default that should be 10158 // passed to the runtime library - a 32-bit integer with the value zero. 10159 assert(NumThreads && "Thread limit expression should be available along " 10160 "with number of teams."); 10161 llvm::Value *OffloadingArgs[] = {RTLoc, 10162 DeviceID, 10163 OutlinedFnID, 10164 PointerNum, 10165 InputInfo.BasePointersArray.getPointer(), 10166 InputInfo.PointersArray.getPointer(), 10167 InputInfo.SizesArray.getPointer(), 10168 MapTypesArray, 10169 MapNamesArray, 10170 InputInfo.MappersArray.getPointer(), 10171 NumTeams, 10172 NumThreads}; 10173 Return = CGF.EmitRuntimeCall( 10174 OMPBuilder.getOrCreateRuntimeFunction( 10175 CGM.getModule(), HasNowait 10176 ? OMPRTL___tgt_target_teams_nowait_mapper 10177 : OMPRTL___tgt_target_teams_mapper), 10178 OffloadingArgs); 10179 } else { 10180 llvm::Value *OffloadingArgs[] = {RTLoc, 10181 DeviceID, 10182 OutlinedFnID, 10183 PointerNum, 10184 InputInfo.BasePointersArray.getPointer(), 10185 InputInfo.PointersArray.getPointer(), 10186 InputInfo.SizesArray.getPointer(), 10187 MapTypesArray, 10188 MapNamesArray, 10189 InputInfo.MappersArray.getPointer()}; 10190 Return = CGF.EmitRuntimeCall( 10191 OMPBuilder.getOrCreateRuntimeFunction( 10192 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10193 : OMPRTL___tgt_target_mapper), 10194 OffloadingArgs); 10195 } 10196 10197 // Check the error code and execute the host version if required. 10198 llvm::BasicBlock *OffloadFailedBlock = 10199 CGF.createBasicBlock("omp_offload.failed"); 10200 llvm::BasicBlock *OffloadContBlock = 10201 CGF.createBasicBlock("omp_offload.cont"); 10202 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10203 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10204 10205 CGF.EmitBlock(OffloadFailedBlock); 10206 if (RequiresOuterTask) { 10207 CapturedVars.clear(); 10208 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10209 } 10210 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10211 CGF.EmitBranch(OffloadContBlock); 10212 10213 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10214 }; 10215 10216 // Notify that the host version must be executed. 10217 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10218 RequiresOuterTask](CodeGenFunction &CGF, 10219 PrePostActionTy &) { 10220 if (RequiresOuterTask) { 10221 CapturedVars.clear(); 10222 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10223 } 10224 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10225 }; 10226 10227 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10228 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10229 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10230 // Fill up the arrays with all the captured variables. 10231 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10232 10233 // Get mappable expression information. 10234 MappableExprsHandler MEHandler(D, CGF); 10235 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10236 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10237 10238 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10239 auto *CV = CapturedVars.begin(); 10240 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10241 CE = CS.capture_end(); 10242 CI != CE; ++CI, ++RI, ++CV) { 10243 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10244 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10245 10246 // VLA sizes are passed to the outlined region by copy and do not have map 10247 // information associated. 10248 if (CI->capturesVariableArrayType()) { 10249 CurInfo.Exprs.push_back(nullptr); 10250 CurInfo.BasePointers.push_back(*CV); 10251 CurInfo.Pointers.push_back(*CV); 10252 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10253 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10254 // Copy to the device as an argument. No need to retrieve it. 10255 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10256 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10257 MappableExprsHandler::OMP_MAP_IMPLICIT); 10258 CurInfo.Mappers.push_back(nullptr); 10259 } else { 10260 // If we have any information in the map clause, we use it, otherwise we 10261 // just do a default mapping. 10262 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10263 if (!CI->capturesThis()) 10264 MappedVarSet.insert(CI->getCapturedVar()); 10265 else 10266 MappedVarSet.insert(nullptr); 10267 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10268 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10269 // Generate correct mapping for variables captured by reference in 10270 // lambdas. 10271 if (CI->capturesVariable()) 10272 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10273 CurInfo, LambdaPointers); 10274 } 10275 // We expect to have at least an element of information for this capture. 10276 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10277 "Non-existing map pointer for capture!"); 10278 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10279 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10280 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10281 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10282 "Inconsistent map information sizes!"); 10283 10284 // If there is an entry in PartialStruct it means we have a struct with 10285 // individual members mapped. Emit an extra combined entry. 10286 if (PartialStruct.Base.isValid()) { 10287 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10288 MEHandler.emitCombinedEntry( 10289 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10290 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10291 } 10292 10293 // We need to append the results of this capture to what we already have. 10294 CombinedInfo.append(CurInfo); 10295 } 10296 // Adjust MEMBER_OF flags for the lambdas captures. 10297 MEHandler.adjustMemberOfForLambdaCaptures( 10298 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10299 CombinedInfo.Types); 10300 // Map any list items in a map clause that were not captures because they 10301 // weren't referenced within the construct. 10302 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10303 10304 TargetDataInfo Info; 10305 // Fill up the arrays and create the arguments. 10306 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10307 emitOffloadingArraysArgument( 10308 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10309 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10310 {/*ForEndTask=*/false}); 10311 10312 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10313 InputInfo.BasePointersArray = 10314 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10315 InputInfo.PointersArray = 10316 Address(Info.PointersArray, CGM.getPointerAlign()); 10317 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10318 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10319 MapTypesArray = Info.MapTypesArray; 10320 MapNamesArray = Info.MapNamesArray; 10321 if (RequiresOuterTask) 10322 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10323 else 10324 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10325 }; 10326 10327 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10328 CodeGenFunction &CGF, PrePostActionTy &) { 10329 if (RequiresOuterTask) { 10330 CodeGenFunction::OMPTargetDataInfo InputInfo; 10331 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10332 } else { 10333 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10334 } 10335 }; 10336 10337 // If we have a target function ID it means that we need to support 10338 // offloading, otherwise, just execute on the host. We need to execute on host 10339 // regardless of the conditional in the if clause if, e.g., the user do not 10340 // specify target triples. 10341 if (OutlinedFnID) { 10342 if (IfCond) { 10343 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10344 } else { 10345 RegionCodeGenTy ThenRCG(TargetThenGen); 10346 ThenRCG(CGF); 10347 } 10348 } else { 10349 RegionCodeGenTy ElseRCG(TargetElseGen); 10350 ElseRCG(CGF); 10351 } 10352 } 10353 10354 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10355 StringRef ParentName) { 10356 if (!S) 10357 return; 10358 10359 // Codegen OMP target directives that offload compute to the device. 10360 bool RequiresDeviceCodegen = 10361 isa<OMPExecutableDirective>(S) && 10362 isOpenMPTargetExecutionDirective( 10363 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10364 10365 if (RequiresDeviceCodegen) { 10366 const auto &E = *cast<OMPExecutableDirective>(S); 10367 unsigned DeviceID; 10368 unsigned FileID; 10369 unsigned Line; 10370 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10371 FileID, Line); 10372 10373 // Is this a target region that should not be emitted as an entry point? If 10374 // so just signal we are done with this target region. 10375 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10376 ParentName, Line)) 10377 return; 10378 10379 switch (E.getDirectiveKind()) { 10380 case OMPD_target: 10381 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10382 cast<OMPTargetDirective>(E)); 10383 break; 10384 case OMPD_target_parallel: 10385 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10386 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10387 break; 10388 case OMPD_target_teams: 10389 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10390 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10391 break; 10392 case OMPD_target_teams_distribute: 10393 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10394 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10395 break; 10396 case OMPD_target_teams_distribute_simd: 10397 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10398 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10399 break; 10400 case OMPD_target_parallel_for: 10401 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10402 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10403 break; 10404 case OMPD_target_parallel_for_simd: 10405 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10406 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10407 break; 10408 case OMPD_target_simd: 10409 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10410 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10411 break; 10412 case OMPD_target_teams_distribute_parallel_for: 10413 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10414 CGM, ParentName, 10415 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10416 break; 10417 case OMPD_target_teams_distribute_parallel_for_simd: 10418 CodeGenFunction:: 10419 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10420 CGM, ParentName, 10421 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10422 break; 10423 case OMPD_parallel: 10424 case OMPD_for: 10425 case OMPD_parallel_for: 10426 case OMPD_parallel_master: 10427 case OMPD_parallel_sections: 10428 case OMPD_for_simd: 10429 case OMPD_parallel_for_simd: 10430 case OMPD_cancel: 10431 case OMPD_cancellation_point: 10432 case OMPD_ordered: 10433 case OMPD_threadprivate: 10434 case OMPD_allocate: 10435 case OMPD_task: 10436 case OMPD_simd: 10437 case OMPD_tile: 10438 case OMPD_unroll: 10439 case OMPD_sections: 10440 case OMPD_section: 10441 case OMPD_single: 10442 case OMPD_master: 10443 case OMPD_critical: 10444 case OMPD_taskyield: 10445 case OMPD_barrier: 10446 case OMPD_taskwait: 10447 case OMPD_taskgroup: 10448 case OMPD_atomic: 10449 case OMPD_flush: 10450 case OMPD_depobj: 10451 case OMPD_scan: 10452 case OMPD_teams: 10453 case OMPD_target_data: 10454 case OMPD_target_exit_data: 10455 case OMPD_target_enter_data: 10456 case OMPD_distribute: 10457 case OMPD_distribute_simd: 10458 case OMPD_distribute_parallel_for: 10459 case OMPD_distribute_parallel_for_simd: 10460 case OMPD_teams_distribute: 10461 case OMPD_teams_distribute_simd: 10462 case OMPD_teams_distribute_parallel_for: 10463 case OMPD_teams_distribute_parallel_for_simd: 10464 case OMPD_target_update: 10465 case OMPD_declare_simd: 10466 case OMPD_declare_variant: 10467 case OMPD_begin_declare_variant: 10468 case OMPD_end_declare_variant: 10469 case OMPD_declare_target: 10470 case OMPD_end_declare_target: 10471 case OMPD_declare_reduction: 10472 case OMPD_declare_mapper: 10473 case OMPD_taskloop: 10474 case OMPD_taskloop_simd: 10475 case OMPD_master_taskloop: 10476 case OMPD_master_taskloop_simd: 10477 case OMPD_parallel_master_taskloop: 10478 case OMPD_parallel_master_taskloop_simd: 10479 case OMPD_requires: 10480 case OMPD_unknown: 10481 default: 10482 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10483 } 10484 return; 10485 } 10486 10487 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10488 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10489 return; 10490 10491 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10492 return; 10493 } 10494 10495 // If this is a lambda function, look into its body. 10496 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10497 S = L->getBody(); 10498 10499 // Keep looking for target regions recursively. 10500 for (const Stmt *II : S->children()) 10501 scanForTargetRegionsFunctions(II, ParentName); 10502 } 10503 10504 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10505 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10506 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10507 if (!DevTy) 10508 return false; 10509 // Do not emit device_type(nohost) functions for the host. 10510 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10511 return true; 10512 // Do not emit device_type(host) functions for the device. 10513 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10514 return true; 10515 return false; 10516 } 10517 10518 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10519 // If emitting code for the host, we do not process FD here. Instead we do 10520 // the normal code generation. 10521 if (!CGM.getLangOpts().OpenMPIsDevice) { 10522 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10523 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10524 CGM.getLangOpts().OpenMPIsDevice)) 10525 return true; 10526 return false; 10527 } 10528 10529 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10530 // Try to detect target regions in the function. 10531 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10532 StringRef Name = CGM.getMangledName(GD); 10533 scanForTargetRegionsFunctions(FD->getBody(), Name); 10534 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10535 CGM.getLangOpts().OpenMPIsDevice)) 10536 return true; 10537 } 10538 10539 // Do not to emit function if it is not marked as declare target. 10540 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10541 AlreadyEmittedTargetDecls.count(VD) == 0; 10542 } 10543 10544 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10545 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10546 CGM.getLangOpts().OpenMPIsDevice)) 10547 return true; 10548 10549 if (!CGM.getLangOpts().OpenMPIsDevice) 10550 return false; 10551 10552 // Check if there are Ctors/Dtors in this declaration and look for target 10553 // regions in it. We use the complete variant to produce the kernel name 10554 // mangling. 10555 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10556 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10557 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10558 StringRef ParentName = 10559 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10560 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10561 } 10562 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10563 StringRef ParentName = 10564 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10565 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10566 } 10567 } 10568 10569 // Do not to emit variable if it is not marked as declare target. 10570 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10571 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10572 cast<VarDecl>(GD.getDecl())); 10573 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10574 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10575 HasRequiresUnifiedSharedMemory)) { 10576 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10577 return true; 10578 } 10579 return false; 10580 } 10581 10582 llvm::Constant * 10583 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10584 const VarDecl *VD) { 10585 assert(VD->getType().isConstant(CGM.getContext()) && 10586 "Expected constant variable."); 10587 StringRef VarName; 10588 llvm::Constant *Addr; 10589 llvm::GlobalValue::LinkageTypes Linkage; 10590 QualType Ty = VD->getType(); 10591 SmallString<128> Buffer; 10592 { 10593 unsigned DeviceID; 10594 unsigned FileID; 10595 unsigned Line; 10596 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10597 FileID, Line); 10598 llvm::raw_svector_ostream OS(Buffer); 10599 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10600 << llvm::format("_%x_", FileID); 10601 if (CGM.getLangOpts().CPlusPlus) { 10602 CGM.getCXXABI().getMangleContext().mangleTypeName(VD->getType(), OS); 10603 OS << "_"; 10604 } 10605 OS << VD->getName() << "_l" << Line; 10606 VarName = OS.str(); 10607 } 10608 Linkage = llvm::GlobalValue::InternalLinkage; 10609 Addr = 10610 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10611 getDefaultFirstprivateAddressSpace()); 10612 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10613 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10614 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10615 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10616 VarName, Addr, VarSize, 10617 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10618 return Addr; 10619 } 10620 10621 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10622 llvm::Constant *Addr) { 10623 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10624 !CGM.getLangOpts().OpenMPIsDevice) 10625 return; 10626 10627 // If we have host/nohost variables, they do not need to be registered. 10628 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10629 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10630 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10631 return; 10632 10633 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10634 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10635 if (!Res) { 10636 if (CGM.getLangOpts().OpenMPIsDevice) { 10637 // Register non-target variables being emitted in device code (debug info 10638 // may cause this). 10639 StringRef VarName = CGM.getMangledName(VD); 10640 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10641 } 10642 return; 10643 } 10644 // Register declare target variables. 10645 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10646 StringRef VarName; 10647 CharUnits VarSize; 10648 llvm::GlobalValue::LinkageTypes Linkage; 10649 10650 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10651 !HasRequiresUnifiedSharedMemory) { 10652 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10653 VarName = CGM.getMangledName(VD); 10654 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10655 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10656 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10657 } else { 10658 VarSize = CharUnits::Zero(); 10659 } 10660 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10661 // Temp solution to prevent optimizations of the internal variables. 10662 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10663 // Do not create a "ref-variable" if the original is not also available 10664 // on the host. 10665 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10666 return; 10667 std::string RefName = getName({VarName, "ref"}); 10668 if (!CGM.GetGlobalValue(RefName)) { 10669 llvm::Constant *AddrRef = 10670 getOrCreateInternalVariable(Addr->getType(), RefName); 10671 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10672 GVAddrRef->setConstant(/*Val=*/true); 10673 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10674 GVAddrRef->setInitializer(Addr); 10675 CGM.addCompilerUsedGlobal(GVAddrRef); 10676 } 10677 } 10678 } else { 10679 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10680 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10681 HasRequiresUnifiedSharedMemory)) && 10682 "Declare target attribute must link or to with unified memory."); 10683 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10684 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10685 else 10686 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10687 10688 if (CGM.getLangOpts().OpenMPIsDevice) { 10689 VarName = Addr->getName(); 10690 Addr = nullptr; 10691 } else { 10692 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10693 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10694 } 10695 VarSize = CGM.getPointerSize(); 10696 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10697 } 10698 10699 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10700 VarName, Addr, VarSize, Flags, Linkage); 10701 } 10702 10703 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10704 if (isa<FunctionDecl>(GD.getDecl()) || 10705 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10706 return emitTargetFunctions(GD); 10707 10708 return emitTargetGlobalVariable(GD); 10709 } 10710 10711 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10712 for (const VarDecl *VD : DeferredGlobalVariables) { 10713 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10714 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10715 if (!Res) 10716 continue; 10717 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10718 !HasRequiresUnifiedSharedMemory) { 10719 CGM.EmitGlobal(VD); 10720 } else { 10721 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10722 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10723 HasRequiresUnifiedSharedMemory)) && 10724 "Expected link clause or to clause with unified memory."); 10725 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10726 } 10727 } 10728 } 10729 10730 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10731 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10732 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10733 " Expected target-based directive."); 10734 } 10735 10736 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10737 for (const OMPClause *Clause : D->clauselists()) { 10738 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10739 HasRequiresUnifiedSharedMemory = true; 10740 } else if (const auto *AC = 10741 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10742 switch (AC->getAtomicDefaultMemOrderKind()) { 10743 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10744 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10745 break; 10746 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10747 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10748 break; 10749 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10750 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10751 break; 10752 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10753 break; 10754 } 10755 } 10756 } 10757 } 10758 10759 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10760 return RequiresAtomicOrdering; 10761 } 10762 10763 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10764 LangAS &AS) { 10765 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10766 return false; 10767 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10768 switch(A->getAllocatorType()) { 10769 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10770 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10771 // Not supported, fallback to the default mem space. 10772 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10773 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10774 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10775 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10776 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10777 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10778 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10779 AS = LangAS::Default; 10780 return true; 10781 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10782 llvm_unreachable("Expected predefined allocator for the variables with the " 10783 "static storage."); 10784 } 10785 return false; 10786 } 10787 10788 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10789 return HasRequiresUnifiedSharedMemory; 10790 } 10791 10792 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10793 CodeGenModule &CGM) 10794 : CGM(CGM) { 10795 if (CGM.getLangOpts().OpenMPIsDevice) { 10796 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10797 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10798 } 10799 } 10800 10801 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10802 if (CGM.getLangOpts().OpenMPIsDevice) 10803 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10804 } 10805 10806 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10807 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10808 return true; 10809 10810 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10811 // Do not to emit function if it is marked as declare target as it was already 10812 // emitted. 10813 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10814 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10815 if (auto *F = dyn_cast_or_null<llvm::Function>( 10816 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10817 return !F->isDeclaration(); 10818 return false; 10819 } 10820 return true; 10821 } 10822 10823 return !AlreadyEmittedTargetDecls.insert(D).second; 10824 } 10825 10826 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10827 // If we don't have entries or if we are emitting code for the device, we 10828 // don't need to do anything. 10829 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10830 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10831 (OffloadEntriesInfoManager.empty() && 10832 !HasEmittedDeclareTargetRegion && 10833 !HasEmittedTargetRegion)) 10834 return nullptr; 10835 10836 // Create and register the function that handles the requires directives. 10837 ASTContext &C = CGM.getContext(); 10838 10839 llvm::Function *RequiresRegFn; 10840 { 10841 CodeGenFunction CGF(CGM); 10842 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10843 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10844 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10845 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10846 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10847 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10848 // TODO: check for other requires clauses. 10849 // The requires directive takes effect only when a target region is 10850 // present in the compilation unit. Otherwise it is ignored and not 10851 // passed to the runtime. This avoids the runtime from throwing an error 10852 // for mismatching requires clauses across compilation units that don't 10853 // contain at least 1 target region. 10854 assert((HasEmittedTargetRegion || 10855 HasEmittedDeclareTargetRegion || 10856 !OffloadEntriesInfoManager.empty()) && 10857 "Target or declare target region expected."); 10858 if (HasRequiresUnifiedSharedMemory) 10859 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10860 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10861 CGM.getModule(), OMPRTL___tgt_register_requires), 10862 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10863 CGF.FinishFunction(); 10864 } 10865 return RequiresRegFn; 10866 } 10867 10868 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10869 const OMPExecutableDirective &D, 10870 SourceLocation Loc, 10871 llvm::Function *OutlinedFn, 10872 ArrayRef<llvm::Value *> CapturedVars) { 10873 if (!CGF.HaveInsertPoint()) 10874 return; 10875 10876 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10877 CodeGenFunction::RunCleanupsScope Scope(CGF); 10878 10879 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10880 llvm::Value *Args[] = { 10881 RTLoc, 10882 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10883 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10884 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10885 RealArgs.append(std::begin(Args), std::end(Args)); 10886 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10887 10888 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10889 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10890 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10891 } 10892 10893 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10894 const Expr *NumTeams, 10895 const Expr *ThreadLimit, 10896 SourceLocation Loc) { 10897 if (!CGF.HaveInsertPoint()) 10898 return; 10899 10900 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10901 10902 llvm::Value *NumTeamsVal = 10903 NumTeams 10904 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10905 CGF.CGM.Int32Ty, /* isSigned = */ true) 10906 : CGF.Builder.getInt32(0); 10907 10908 llvm::Value *ThreadLimitVal = 10909 ThreadLimit 10910 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10911 CGF.CGM.Int32Ty, /* isSigned = */ true) 10912 : CGF.Builder.getInt32(0); 10913 10914 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10915 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10916 ThreadLimitVal}; 10917 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10918 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10919 PushNumTeamsArgs); 10920 } 10921 10922 void CGOpenMPRuntime::emitTargetDataCalls( 10923 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10924 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10925 if (!CGF.HaveInsertPoint()) 10926 return; 10927 10928 // Action used to replace the default codegen action and turn privatization 10929 // off. 10930 PrePostActionTy NoPrivAction; 10931 10932 // Generate the code for the opening of the data environment. Capture all the 10933 // arguments of the runtime call by reference because they are used in the 10934 // closing of the region. 10935 auto &&BeginThenGen = [this, &D, Device, &Info, 10936 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10937 // Fill up the arrays with all the mapped variables. 10938 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10939 10940 // Get map clause information. 10941 MappableExprsHandler MEHandler(D, CGF); 10942 MEHandler.generateAllInfo(CombinedInfo); 10943 10944 // Fill up the arrays and create the arguments. 10945 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10946 /*IsNonContiguous=*/true); 10947 10948 llvm::Value *BasePointersArrayArg = nullptr; 10949 llvm::Value *PointersArrayArg = nullptr; 10950 llvm::Value *SizesArrayArg = nullptr; 10951 llvm::Value *MapTypesArrayArg = nullptr; 10952 llvm::Value *MapNamesArrayArg = nullptr; 10953 llvm::Value *MappersArrayArg = nullptr; 10954 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10955 SizesArrayArg, MapTypesArrayArg, 10956 MapNamesArrayArg, MappersArrayArg, Info); 10957 10958 // Emit device ID if any. 10959 llvm::Value *DeviceID = nullptr; 10960 if (Device) { 10961 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10962 CGF.Int64Ty, /*isSigned=*/true); 10963 } else { 10964 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10965 } 10966 10967 // Emit the number of elements in the offloading arrays. 10968 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10969 // 10970 // Source location for the ident struct 10971 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10972 10973 llvm::Value *OffloadingArgs[] = {RTLoc, 10974 DeviceID, 10975 PointerNum, 10976 BasePointersArrayArg, 10977 PointersArrayArg, 10978 SizesArrayArg, 10979 MapTypesArrayArg, 10980 MapNamesArrayArg, 10981 MappersArrayArg}; 10982 CGF.EmitRuntimeCall( 10983 OMPBuilder.getOrCreateRuntimeFunction( 10984 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10985 OffloadingArgs); 10986 10987 // If device pointer privatization is required, emit the body of the region 10988 // here. It will have to be duplicated: with and without privatization. 10989 if (!Info.CaptureDeviceAddrMap.empty()) 10990 CodeGen(CGF); 10991 }; 10992 10993 // Generate code for the closing of the data region. 10994 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10995 PrePostActionTy &) { 10996 assert(Info.isValid() && "Invalid data environment closing arguments."); 10997 10998 llvm::Value *BasePointersArrayArg = nullptr; 10999 llvm::Value *PointersArrayArg = nullptr; 11000 llvm::Value *SizesArrayArg = nullptr; 11001 llvm::Value *MapTypesArrayArg = nullptr; 11002 llvm::Value *MapNamesArrayArg = nullptr; 11003 llvm::Value *MappersArrayArg = nullptr; 11004 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11005 SizesArrayArg, MapTypesArrayArg, 11006 MapNamesArrayArg, MappersArrayArg, Info, 11007 {/*ForEndCall=*/true}); 11008 11009 // Emit device ID if any. 11010 llvm::Value *DeviceID = nullptr; 11011 if (Device) { 11012 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11013 CGF.Int64Ty, /*isSigned=*/true); 11014 } else { 11015 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11016 } 11017 11018 // Emit the number of elements in the offloading arrays. 11019 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11020 11021 // Source location for the ident struct 11022 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11023 11024 llvm::Value *OffloadingArgs[] = {RTLoc, 11025 DeviceID, 11026 PointerNum, 11027 BasePointersArrayArg, 11028 PointersArrayArg, 11029 SizesArrayArg, 11030 MapTypesArrayArg, 11031 MapNamesArrayArg, 11032 MappersArrayArg}; 11033 CGF.EmitRuntimeCall( 11034 OMPBuilder.getOrCreateRuntimeFunction( 11035 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11036 OffloadingArgs); 11037 }; 11038 11039 // If we need device pointer privatization, we need to emit the body of the 11040 // region with no privatization in the 'else' branch of the conditional. 11041 // Otherwise, we don't have to do anything. 11042 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11043 PrePostActionTy &) { 11044 if (!Info.CaptureDeviceAddrMap.empty()) { 11045 CodeGen.setAction(NoPrivAction); 11046 CodeGen(CGF); 11047 } 11048 }; 11049 11050 // We don't have to do anything to close the region if the if clause evaluates 11051 // to false. 11052 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11053 11054 if (IfCond) { 11055 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11056 } else { 11057 RegionCodeGenTy RCG(BeginThenGen); 11058 RCG(CGF); 11059 } 11060 11061 // If we don't require privatization of device pointers, we emit the body in 11062 // between the runtime calls. This avoids duplicating the body code. 11063 if (Info.CaptureDeviceAddrMap.empty()) { 11064 CodeGen.setAction(NoPrivAction); 11065 CodeGen(CGF); 11066 } 11067 11068 if (IfCond) { 11069 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11070 } else { 11071 RegionCodeGenTy RCG(EndThenGen); 11072 RCG(CGF); 11073 } 11074 } 11075 11076 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11077 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11078 const Expr *Device) { 11079 if (!CGF.HaveInsertPoint()) 11080 return; 11081 11082 assert((isa<OMPTargetEnterDataDirective>(D) || 11083 isa<OMPTargetExitDataDirective>(D) || 11084 isa<OMPTargetUpdateDirective>(D)) && 11085 "Expecting either target enter, exit data, or update directives."); 11086 11087 CodeGenFunction::OMPTargetDataInfo InputInfo; 11088 llvm::Value *MapTypesArray = nullptr; 11089 llvm::Value *MapNamesArray = nullptr; 11090 // Generate the code for the opening of the data environment. 11091 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11092 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11093 // Emit device ID if any. 11094 llvm::Value *DeviceID = nullptr; 11095 if (Device) { 11096 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11097 CGF.Int64Ty, /*isSigned=*/true); 11098 } else { 11099 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11100 } 11101 11102 // Emit the number of elements in the offloading arrays. 11103 llvm::Constant *PointerNum = 11104 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11105 11106 // Source location for the ident struct 11107 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11108 11109 llvm::Value *OffloadingArgs[] = {RTLoc, 11110 DeviceID, 11111 PointerNum, 11112 InputInfo.BasePointersArray.getPointer(), 11113 InputInfo.PointersArray.getPointer(), 11114 InputInfo.SizesArray.getPointer(), 11115 MapTypesArray, 11116 MapNamesArray, 11117 InputInfo.MappersArray.getPointer()}; 11118 11119 // Select the right runtime function call for each standalone 11120 // directive. 11121 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11122 RuntimeFunction RTLFn; 11123 switch (D.getDirectiveKind()) { 11124 case OMPD_target_enter_data: 11125 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11126 : OMPRTL___tgt_target_data_begin_mapper; 11127 break; 11128 case OMPD_target_exit_data: 11129 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11130 : OMPRTL___tgt_target_data_end_mapper; 11131 break; 11132 case OMPD_target_update: 11133 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11134 : OMPRTL___tgt_target_data_update_mapper; 11135 break; 11136 case OMPD_parallel: 11137 case OMPD_for: 11138 case OMPD_parallel_for: 11139 case OMPD_parallel_master: 11140 case OMPD_parallel_sections: 11141 case OMPD_for_simd: 11142 case OMPD_parallel_for_simd: 11143 case OMPD_cancel: 11144 case OMPD_cancellation_point: 11145 case OMPD_ordered: 11146 case OMPD_threadprivate: 11147 case OMPD_allocate: 11148 case OMPD_task: 11149 case OMPD_simd: 11150 case OMPD_tile: 11151 case OMPD_unroll: 11152 case OMPD_sections: 11153 case OMPD_section: 11154 case OMPD_single: 11155 case OMPD_master: 11156 case OMPD_critical: 11157 case OMPD_taskyield: 11158 case OMPD_barrier: 11159 case OMPD_taskwait: 11160 case OMPD_taskgroup: 11161 case OMPD_atomic: 11162 case OMPD_flush: 11163 case OMPD_depobj: 11164 case OMPD_scan: 11165 case OMPD_teams: 11166 case OMPD_target_data: 11167 case OMPD_distribute: 11168 case OMPD_distribute_simd: 11169 case OMPD_distribute_parallel_for: 11170 case OMPD_distribute_parallel_for_simd: 11171 case OMPD_teams_distribute: 11172 case OMPD_teams_distribute_simd: 11173 case OMPD_teams_distribute_parallel_for: 11174 case OMPD_teams_distribute_parallel_for_simd: 11175 case OMPD_declare_simd: 11176 case OMPD_declare_variant: 11177 case OMPD_begin_declare_variant: 11178 case OMPD_end_declare_variant: 11179 case OMPD_declare_target: 11180 case OMPD_end_declare_target: 11181 case OMPD_declare_reduction: 11182 case OMPD_declare_mapper: 11183 case OMPD_taskloop: 11184 case OMPD_taskloop_simd: 11185 case OMPD_master_taskloop: 11186 case OMPD_master_taskloop_simd: 11187 case OMPD_parallel_master_taskloop: 11188 case OMPD_parallel_master_taskloop_simd: 11189 case OMPD_target: 11190 case OMPD_target_simd: 11191 case OMPD_target_teams_distribute: 11192 case OMPD_target_teams_distribute_simd: 11193 case OMPD_target_teams_distribute_parallel_for: 11194 case OMPD_target_teams_distribute_parallel_for_simd: 11195 case OMPD_target_teams: 11196 case OMPD_target_parallel: 11197 case OMPD_target_parallel_for: 11198 case OMPD_target_parallel_for_simd: 11199 case OMPD_requires: 11200 case OMPD_unknown: 11201 default: 11202 llvm_unreachable("Unexpected standalone target data directive."); 11203 break; 11204 } 11205 CGF.EmitRuntimeCall( 11206 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11207 OffloadingArgs); 11208 }; 11209 11210 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11211 &MapNamesArray](CodeGenFunction &CGF, 11212 PrePostActionTy &) { 11213 // Fill up the arrays with all the mapped variables. 11214 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11215 11216 // Get map clause information. 11217 MappableExprsHandler MEHandler(D, CGF); 11218 MEHandler.generateAllInfo(CombinedInfo); 11219 11220 TargetDataInfo Info; 11221 // Fill up the arrays and create the arguments. 11222 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11223 /*IsNonContiguous=*/true); 11224 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11225 D.hasClausesOfKind<OMPNowaitClause>(); 11226 emitOffloadingArraysArgument( 11227 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11228 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11229 {/*ForEndTask=*/false}); 11230 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11231 InputInfo.BasePointersArray = 11232 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11233 InputInfo.PointersArray = 11234 Address(Info.PointersArray, CGM.getPointerAlign()); 11235 InputInfo.SizesArray = 11236 Address(Info.SizesArray, CGM.getPointerAlign()); 11237 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11238 MapTypesArray = Info.MapTypesArray; 11239 MapNamesArray = Info.MapNamesArray; 11240 if (RequiresOuterTask) 11241 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11242 else 11243 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11244 }; 11245 11246 if (IfCond) { 11247 emitIfClause(CGF, IfCond, TargetThenGen, 11248 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11249 } else { 11250 RegionCodeGenTy ThenRCG(TargetThenGen); 11251 ThenRCG(CGF); 11252 } 11253 } 11254 11255 namespace { 11256 /// Kind of parameter in a function with 'declare simd' directive. 11257 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11258 /// Attribute set of the parameter. 11259 struct ParamAttrTy { 11260 ParamKindTy Kind = Vector; 11261 llvm::APSInt StrideOrArg; 11262 llvm::APSInt Alignment; 11263 }; 11264 } // namespace 11265 11266 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11267 ArrayRef<ParamAttrTy> ParamAttrs) { 11268 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11269 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11270 // of that clause. The VLEN value must be power of 2. 11271 // In other case the notion of the function`s "characteristic data type" (CDT) 11272 // is used to compute the vector length. 11273 // CDT is defined in the following order: 11274 // a) For non-void function, the CDT is the return type. 11275 // b) If the function has any non-uniform, non-linear parameters, then the 11276 // CDT is the type of the first such parameter. 11277 // c) If the CDT determined by a) or b) above is struct, union, or class 11278 // type which is pass-by-value (except for the type that maps to the 11279 // built-in complex data type), the characteristic data type is int. 11280 // d) If none of the above three cases is applicable, the CDT is int. 11281 // The VLEN is then determined based on the CDT and the size of vector 11282 // register of that ISA for which current vector version is generated. The 11283 // VLEN is computed using the formula below: 11284 // VLEN = sizeof(vector_register) / sizeof(CDT), 11285 // where vector register size specified in section 3.2.1 Registers and the 11286 // Stack Frame of original AMD64 ABI document. 11287 QualType RetType = FD->getReturnType(); 11288 if (RetType.isNull()) 11289 return 0; 11290 ASTContext &C = FD->getASTContext(); 11291 QualType CDT; 11292 if (!RetType.isNull() && !RetType->isVoidType()) { 11293 CDT = RetType; 11294 } else { 11295 unsigned Offset = 0; 11296 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11297 if (ParamAttrs[Offset].Kind == Vector) 11298 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11299 ++Offset; 11300 } 11301 if (CDT.isNull()) { 11302 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11303 if (ParamAttrs[I + Offset].Kind == Vector) { 11304 CDT = FD->getParamDecl(I)->getType(); 11305 break; 11306 } 11307 } 11308 } 11309 } 11310 if (CDT.isNull()) 11311 CDT = C.IntTy; 11312 CDT = CDT->getCanonicalTypeUnqualified(); 11313 if (CDT->isRecordType() || CDT->isUnionType()) 11314 CDT = C.IntTy; 11315 return C.getTypeSize(CDT); 11316 } 11317 11318 static void 11319 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11320 const llvm::APSInt &VLENVal, 11321 ArrayRef<ParamAttrTy> ParamAttrs, 11322 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11323 struct ISADataTy { 11324 char ISA; 11325 unsigned VecRegSize; 11326 }; 11327 ISADataTy ISAData[] = { 11328 { 11329 'b', 128 11330 }, // SSE 11331 { 11332 'c', 256 11333 }, // AVX 11334 { 11335 'd', 256 11336 }, // AVX2 11337 { 11338 'e', 512 11339 }, // AVX512 11340 }; 11341 llvm::SmallVector<char, 2> Masked; 11342 switch (State) { 11343 case OMPDeclareSimdDeclAttr::BS_Undefined: 11344 Masked.push_back('N'); 11345 Masked.push_back('M'); 11346 break; 11347 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11348 Masked.push_back('N'); 11349 break; 11350 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11351 Masked.push_back('M'); 11352 break; 11353 } 11354 for (char Mask : Masked) { 11355 for (const ISADataTy &Data : ISAData) { 11356 SmallString<256> Buffer; 11357 llvm::raw_svector_ostream Out(Buffer); 11358 Out << "_ZGV" << Data.ISA << Mask; 11359 if (!VLENVal) { 11360 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11361 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11362 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11363 } else { 11364 Out << VLENVal; 11365 } 11366 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11367 switch (ParamAttr.Kind){ 11368 case LinearWithVarStride: 11369 Out << 's' << ParamAttr.StrideOrArg; 11370 break; 11371 case Linear: 11372 Out << 'l'; 11373 if (ParamAttr.StrideOrArg != 1) 11374 Out << ParamAttr.StrideOrArg; 11375 break; 11376 case Uniform: 11377 Out << 'u'; 11378 break; 11379 case Vector: 11380 Out << 'v'; 11381 break; 11382 } 11383 if (!!ParamAttr.Alignment) 11384 Out << 'a' << ParamAttr.Alignment; 11385 } 11386 Out << '_' << Fn->getName(); 11387 Fn->addFnAttr(Out.str()); 11388 } 11389 } 11390 } 11391 11392 // This are the Functions that are needed to mangle the name of the 11393 // vector functions generated by the compiler, according to the rules 11394 // defined in the "Vector Function ABI specifications for AArch64", 11395 // available at 11396 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11397 11398 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11399 /// 11400 /// TODO: Need to implement the behavior for reference marked with a 11401 /// var or no linear modifiers (1.b in the section). For this, we 11402 /// need to extend ParamKindTy to support the linear modifiers. 11403 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11404 QT = QT.getCanonicalType(); 11405 11406 if (QT->isVoidType()) 11407 return false; 11408 11409 if (Kind == ParamKindTy::Uniform) 11410 return false; 11411 11412 if (Kind == ParamKindTy::Linear) 11413 return false; 11414 11415 // TODO: Handle linear references with modifiers 11416 11417 if (Kind == ParamKindTy::LinearWithVarStride) 11418 return false; 11419 11420 return true; 11421 } 11422 11423 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11424 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11425 QT = QT.getCanonicalType(); 11426 unsigned Size = C.getTypeSize(QT); 11427 11428 // Only scalars and complex within 16 bytes wide set PVB to true. 11429 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11430 return false; 11431 11432 if (QT->isFloatingType()) 11433 return true; 11434 11435 if (QT->isIntegerType()) 11436 return true; 11437 11438 if (QT->isPointerType()) 11439 return true; 11440 11441 // TODO: Add support for complex types (section 3.1.2, item 2). 11442 11443 return false; 11444 } 11445 11446 /// Computes the lane size (LS) of a return type or of an input parameter, 11447 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11448 /// TODO: Add support for references, section 3.2.1, item 1. 11449 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11450 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11451 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11452 if (getAArch64PBV(PTy, C)) 11453 return C.getTypeSize(PTy); 11454 } 11455 if (getAArch64PBV(QT, C)) 11456 return C.getTypeSize(QT); 11457 11458 return C.getTypeSize(C.getUIntPtrType()); 11459 } 11460 11461 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11462 // signature of the scalar function, as defined in 3.2.2 of the 11463 // AAVFABI. 11464 static std::tuple<unsigned, unsigned, bool> 11465 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11466 QualType RetType = FD->getReturnType().getCanonicalType(); 11467 11468 ASTContext &C = FD->getASTContext(); 11469 11470 bool OutputBecomesInput = false; 11471 11472 llvm::SmallVector<unsigned, 8> Sizes; 11473 if (!RetType->isVoidType()) { 11474 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11475 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11476 OutputBecomesInput = true; 11477 } 11478 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11479 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11480 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11481 } 11482 11483 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11484 // The LS of a function parameter / return value can only be a power 11485 // of 2, starting from 8 bits, up to 128. 11486 assert(std::all_of(Sizes.begin(), Sizes.end(), 11487 [](unsigned Size) { 11488 return Size == 8 || Size == 16 || Size == 32 || 11489 Size == 64 || Size == 128; 11490 }) && 11491 "Invalid size"); 11492 11493 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11494 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11495 OutputBecomesInput); 11496 } 11497 11498 /// Mangle the parameter part of the vector function name according to 11499 /// their OpenMP classification. The mangling function is defined in 11500 /// section 3.5 of the AAVFABI. 11501 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11502 SmallString<256> Buffer; 11503 llvm::raw_svector_ostream Out(Buffer); 11504 for (const auto &ParamAttr : ParamAttrs) { 11505 switch (ParamAttr.Kind) { 11506 case LinearWithVarStride: 11507 Out << "ls" << ParamAttr.StrideOrArg; 11508 break; 11509 case Linear: 11510 Out << 'l'; 11511 // Don't print the step value if it is not present or if it is 11512 // equal to 1. 11513 if (ParamAttr.StrideOrArg != 1) 11514 Out << ParamAttr.StrideOrArg; 11515 break; 11516 case Uniform: 11517 Out << 'u'; 11518 break; 11519 case Vector: 11520 Out << 'v'; 11521 break; 11522 } 11523 11524 if (!!ParamAttr.Alignment) 11525 Out << 'a' << ParamAttr.Alignment; 11526 } 11527 11528 return std::string(Out.str()); 11529 } 11530 11531 // Function used to add the attribute. The parameter `VLEN` is 11532 // templated to allow the use of "x" when targeting scalable functions 11533 // for SVE. 11534 template <typename T> 11535 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11536 char ISA, StringRef ParSeq, 11537 StringRef MangledName, bool OutputBecomesInput, 11538 llvm::Function *Fn) { 11539 SmallString<256> Buffer; 11540 llvm::raw_svector_ostream Out(Buffer); 11541 Out << Prefix << ISA << LMask << VLEN; 11542 if (OutputBecomesInput) 11543 Out << "v"; 11544 Out << ParSeq << "_" << MangledName; 11545 Fn->addFnAttr(Out.str()); 11546 } 11547 11548 // Helper function to generate the Advanced SIMD names depending on 11549 // the value of the NDS when simdlen is not present. 11550 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11551 StringRef Prefix, char ISA, 11552 StringRef ParSeq, StringRef MangledName, 11553 bool OutputBecomesInput, 11554 llvm::Function *Fn) { 11555 switch (NDS) { 11556 case 8: 11557 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11558 OutputBecomesInput, Fn); 11559 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11560 OutputBecomesInput, Fn); 11561 break; 11562 case 16: 11563 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11564 OutputBecomesInput, Fn); 11565 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11566 OutputBecomesInput, Fn); 11567 break; 11568 case 32: 11569 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11570 OutputBecomesInput, Fn); 11571 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11572 OutputBecomesInput, Fn); 11573 break; 11574 case 64: 11575 case 128: 11576 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11577 OutputBecomesInput, Fn); 11578 break; 11579 default: 11580 llvm_unreachable("Scalar type is too wide."); 11581 } 11582 } 11583 11584 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11585 static void emitAArch64DeclareSimdFunction( 11586 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11587 ArrayRef<ParamAttrTy> ParamAttrs, 11588 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11589 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11590 11591 // Get basic data for building the vector signature. 11592 const auto Data = getNDSWDS(FD, ParamAttrs); 11593 const unsigned NDS = std::get<0>(Data); 11594 const unsigned WDS = std::get<1>(Data); 11595 const bool OutputBecomesInput = std::get<2>(Data); 11596 11597 // Check the values provided via `simdlen` by the user. 11598 // 1. A `simdlen(1)` doesn't produce vector signatures, 11599 if (UserVLEN == 1) { 11600 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11601 DiagnosticsEngine::Warning, 11602 "The clause simdlen(1) has no effect when targeting aarch64."); 11603 CGM.getDiags().Report(SLoc, DiagID); 11604 return; 11605 } 11606 11607 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11608 // Advanced SIMD output. 11609 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11610 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11611 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11612 "power of 2 when targeting Advanced SIMD."); 11613 CGM.getDiags().Report(SLoc, DiagID); 11614 return; 11615 } 11616 11617 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11618 // limits. 11619 if (ISA == 's' && UserVLEN != 0) { 11620 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11621 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11622 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11623 "lanes in the architectural constraints " 11624 "for SVE (min is 128-bit, max is " 11625 "2048-bit, by steps of 128-bit)"); 11626 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11627 return; 11628 } 11629 } 11630 11631 // Sort out parameter sequence. 11632 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11633 StringRef Prefix = "_ZGV"; 11634 // Generate simdlen from user input (if any). 11635 if (UserVLEN) { 11636 if (ISA == 's') { 11637 // SVE generates only a masked function. 11638 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11639 OutputBecomesInput, Fn); 11640 } else { 11641 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11642 // Advanced SIMD generates one or two functions, depending on 11643 // the `[not]inbranch` clause. 11644 switch (State) { 11645 case OMPDeclareSimdDeclAttr::BS_Undefined: 11646 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11647 OutputBecomesInput, Fn); 11648 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11649 OutputBecomesInput, Fn); 11650 break; 11651 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11652 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11653 OutputBecomesInput, Fn); 11654 break; 11655 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11656 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11657 OutputBecomesInput, Fn); 11658 break; 11659 } 11660 } 11661 } else { 11662 // If no user simdlen is provided, follow the AAVFABI rules for 11663 // generating the vector length. 11664 if (ISA == 's') { 11665 // SVE, section 3.4.1, item 1. 11666 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11667 OutputBecomesInput, Fn); 11668 } else { 11669 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11670 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11671 // two vector names depending on the use of the clause 11672 // `[not]inbranch`. 11673 switch (State) { 11674 case OMPDeclareSimdDeclAttr::BS_Undefined: 11675 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11676 OutputBecomesInput, Fn); 11677 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11678 OutputBecomesInput, Fn); 11679 break; 11680 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11681 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11682 OutputBecomesInput, Fn); 11683 break; 11684 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11685 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11686 OutputBecomesInput, Fn); 11687 break; 11688 } 11689 } 11690 } 11691 } 11692 11693 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11694 llvm::Function *Fn) { 11695 ASTContext &C = CGM.getContext(); 11696 FD = FD->getMostRecentDecl(); 11697 // Map params to their positions in function decl. 11698 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11699 if (isa<CXXMethodDecl>(FD)) 11700 ParamPositions.try_emplace(FD, 0); 11701 unsigned ParamPos = ParamPositions.size(); 11702 for (const ParmVarDecl *P : FD->parameters()) { 11703 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11704 ++ParamPos; 11705 } 11706 while (FD) { 11707 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11708 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11709 // Mark uniform parameters. 11710 for (const Expr *E : Attr->uniforms()) { 11711 E = E->IgnoreParenImpCasts(); 11712 unsigned Pos; 11713 if (isa<CXXThisExpr>(E)) { 11714 Pos = ParamPositions[FD]; 11715 } else { 11716 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11717 ->getCanonicalDecl(); 11718 Pos = ParamPositions[PVD]; 11719 } 11720 ParamAttrs[Pos].Kind = Uniform; 11721 } 11722 // Get alignment info. 11723 auto NI = Attr->alignments_begin(); 11724 for (const Expr *E : Attr->aligneds()) { 11725 E = E->IgnoreParenImpCasts(); 11726 unsigned Pos; 11727 QualType ParmTy; 11728 if (isa<CXXThisExpr>(E)) { 11729 Pos = ParamPositions[FD]; 11730 ParmTy = E->getType(); 11731 } else { 11732 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11733 ->getCanonicalDecl(); 11734 Pos = ParamPositions[PVD]; 11735 ParmTy = PVD->getType(); 11736 } 11737 ParamAttrs[Pos].Alignment = 11738 (*NI) 11739 ? (*NI)->EvaluateKnownConstInt(C) 11740 : llvm::APSInt::getUnsigned( 11741 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11742 .getQuantity()); 11743 ++NI; 11744 } 11745 // Mark linear parameters. 11746 auto SI = Attr->steps_begin(); 11747 auto MI = Attr->modifiers_begin(); 11748 for (const Expr *E : Attr->linears()) { 11749 E = E->IgnoreParenImpCasts(); 11750 unsigned Pos; 11751 // Rescaling factor needed to compute the linear parameter 11752 // value in the mangled name. 11753 unsigned PtrRescalingFactor = 1; 11754 if (isa<CXXThisExpr>(E)) { 11755 Pos = ParamPositions[FD]; 11756 } else { 11757 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11758 ->getCanonicalDecl(); 11759 Pos = ParamPositions[PVD]; 11760 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11761 PtrRescalingFactor = CGM.getContext() 11762 .getTypeSizeInChars(P->getPointeeType()) 11763 .getQuantity(); 11764 } 11765 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11766 ParamAttr.Kind = Linear; 11767 // Assuming a stride of 1, for `linear` without modifiers. 11768 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11769 if (*SI) { 11770 Expr::EvalResult Result; 11771 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11772 if (const auto *DRE = 11773 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11774 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11775 ParamAttr.Kind = LinearWithVarStride; 11776 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11777 ParamPositions[StridePVD->getCanonicalDecl()]); 11778 } 11779 } 11780 } else { 11781 ParamAttr.StrideOrArg = Result.Val.getInt(); 11782 } 11783 } 11784 // If we are using a linear clause on a pointer, we need to 11785 // rescale the value of linear_step with the byte size of the 11786 // pointee type. 11787 if (Linear == ParamAttr.Kind) 11788 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11789 ++SI; 11790 ++MI; 11791 } 11792 llvm::APSInt VLENVal; 11793 SourceLocation ExprLoc; 11794 const Expr *VLENExpr = Attr->getSimdlen(); 11795 if (VLENExpr) { 11796 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11797 ExprLoc = VLENExpr->getExprLoc(); 11798 } 11799 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11800 if (CGM.getTriple().isX86()) { 11801 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11802 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11803 unsigned VLEN = VLENVal.getExtValue(); 11804 StringRef MangledName = Fn->getName(); 11805 if (CGM.getTarget().hasFeature("sve")) 11806 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11807 MangledName, 's', 128, Fn, ExprLoc); 11808 if (CGM.getTarget().hasFeature("neon")) 11809 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11810 MangledName, 'n', 128, Fn, ExprLoc); 11811 } 11812 } 11813 FD = FD->getPreviousDecl(); 11814 } 11815 } 11816 11817 namespace { 11818 /// Cleanup action for doacross support. 11819 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11820 public: 11821 static const int DoacrossFinArgs = 2; 11822 11823 private: 11824 llvm::FunctionCallee RTLFn; 11825 llvm::Value *Args[DoacrossFinArgs]; 11826 11827 public: 11828 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11829 ArrayRef<llvm::Value *> CallArgs) 11830 : RTLFn(RTLFn) { 11831 assert(CallArgs.size() == DoacrossFinArgs); 11832 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11833 } 11834 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11835 if (!CGF.HaveInsertPoint()) 11836 return; 11837 CGF.EmitRuntimeCall(RTLFn, Args); 11838 } 11839 }; 11840 } // namespace 11841 11842 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11843 const OMPLoopDirective &D, 11844 ArrayRef<Expr *> NumIterations) { 11845 if (!CGF.HaveInsertPoint()) 11846 return; 11847 11848 ASTContext &C = CGM.getContext(); 11849 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11850 RecordDecl *RD; 11851 if (KmpDimTy.isNull()) { 11852 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11853 // kmp_int64 lo; // lower 11854 // kmp_int64 up; // upper 11855 // kmp_int64 st; // stride 11856 // }; 11857 RD = C.buildImplicitRecord("kmp_dim"); 11858 RD->startDefinition(); 11859 addFieldToRecordDecl(C, RD, Int64Ty); 11860 addFieldToRecordDecl(C, RD, Int64Ty); 11861 addFieldToRecordDecl(C, RD, Int64Ty); 11862 RD->completeDefinition(); 11863 KmpDimTy = C.getRecordType(RD); 11864 } else { 11865 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11866 } 11867 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11868 QualType ArrayTy = 11869 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11870 11871 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11872 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11873 enum { LowerFD = 0, UpperFD, StrideFD }; 11874 // Fill dims with data. 11875 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11876 LValue DimsLVal = CGF.MakeAddrLValue( 11877 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11878 // dims.upper = num_iterations; 11879 LValue UpperLVal = CGF.EmitLValueForField( 11880 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11881 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11882 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11883 Int64Ty, NumIterations[I]->getExprLoc()); 11884 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11885 // dims.stride = 1; 11886 LValue StrideLVal = CGF.EmitLValueForField( 11887 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11888 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11889 StrideLVal); 11890 } 11891 11892 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11893 // kmp_int32 num_dims, struct kmp_dim * dims); 11894 llvm::Value *Args[] = { 11895 emitUpdateLocation(CGF, D.getBeginLoc()), 11896 getThreadID(CGF, D.getBeginLoc()), 11897 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11898 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11899 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11900 CGM.VoidPtrTy)}; 11901 11902 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11903 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11904 CGF.EmitRuntimeCall(RTLFn, Args); 11905 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11906 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11907 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11908 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11909 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11910 llvm::makeArrayRef(FiniArgs)); 11911 } 11912 11913 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11914 const OMPDependClause *C) { 11915 QualType Int64Ty = 11916 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11917 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11918 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11919 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11920 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11921 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11922 const Expr *CounterVal = C->getLoopData(I); 11923 assert(CounterVal); 11924 llvm::Value *CntVal = CGF.EmitScalarConversion( 11925 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11926 CounterVal->getExprLoc()); 11927 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11928 /*Volatile=*/false, Int64Ty); 11929 } 11930 llvm::Value *Args[] = { 11931 emitUpdateLocation(CGF, C->getBeginLoc()), 11932 getThreadID(CGF, C->getBeginLoc()), 11933 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11934 llvm::FunctionCallee RTLFn; 11935 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11936 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11937 OMPRTL___kmpc_doacross_post); 11938 } else { 11939 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11940 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11941 OMPRTL___kmpc_doacross_wait); 11942 } 11943 CGF.EmitRuntimeCall(RTLFn, Args); 11944 } 11945 11946 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11947 llvm::FunctionCallee Callee, 11948 ArrayRef<llvm::Value *> Args) const { 11949 assert(Loc.isValid() && "Outlined function call location must be valid."); 11950 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11951 11952 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11953 if (Fn->doesNotThrow()) { 11954 CGF.EmitNounwindRuntimeCall(Fn, Args); 11955 return; 11956 } 11957 } 11958 CGF.EmitRuntimeCall(Callee, Args); 11959 } 11960 11961 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11962 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11963 ArrayRef<llvm::Value *> Args) const { 11964 emitCall(CGF, Loc, OutlinedFn, Args); 11965 } 11966 11967 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11968 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11969 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11970 HasEmittedDeclareTargetRegion = true; 11971 } 11972 11973 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11974 const VarDecl *NativeParam, 11975 const VarDecl *TargetParam) const { 11976 return CGF.GetAddrOfLocalVar(NativeParam); 11977 } 11978 11979 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11980 const VarDecl *VD) { 11981 if (!VD) 11982 return Address::invalid(); 11983 Address UntiedAddr = Address::invalid(); 11984 Address UntiedRealAddr = Address::invalid(); 11985 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11986 if (It != FunctionToUntiedTaskStackMap.end()) { 11987 const UntiedLocalVarsAddressesMap &UntiedData = 11988 UntiedLocalVarsStack[It->second]; 11989 auto I = UntiedData.find(VD); 11990 if (I != UntiedData.end()) { 11991 UntiedAddr = I->second.first; 11992 UntiedRealAddr = I->second.second; 11993 } 11994 } 11995 const VarDecl *CVD = VD->getCanonicalDecl(); 11996 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11997 // Use the default allocation. 11998 if (!isAllocatableDecl(VD)) 11999 return UntiedAddr; 12000 llvm::Value *Size; 12001 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12002 if (CVD->getType()->isVariablyModifiedType()) { 12003 Size = CGF.getTypeSize(CVD->getType()); 12004 // Align the size: ((size + align - 1) / align) * align 12005 Size = CGF.Builder.CreateNUWAdd( 12006 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12007 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12008 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12009 } else { 12010 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12011 Size = CGM.getSize(Sz.alignTo(Align)); 12012 } 12013 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12014 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12015 assert(AA->getAllocator() && 12016 "Expected allocator expression for non-default allocator."); 12017 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12018 // According to the standard, the original allocator type is a enum 12019 // (integer). Convert to pointer type, if required. 12020 Allocator = CGF.EmitScalarConversion( 12021 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12022 AA->getAllocator()->getExprLoc()); 12023 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12024 12025 llvm::Value *Addr = 12026 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12027 CGM.getModule(), OMPRTL___kmpc_alloc), 12028 Args, getName({CVD->getName(), ".void.addr"})); 12029 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12030 CGM.getModule(), OMPRTL___kmpc_free); 12031 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12032 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12033 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12034 if (UntiedAddr.isValid()) 12035 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12036 12037 // Cleanup action for allocate support. 12038 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12039 llvm::FunctionCallee RTLFn; 12040 unsigned LocEncoding; 12041 Address Addr; 12042 const Expr *Allocator; 12043 12044 public: 12045 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 12046 Address Addr, const Expr *Allocator) 12047 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12048 Allocator(Allocator) {} 12049 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12050 if (!CGF.HaveInsertPoint()) 12051 return; 12052 llvm::Value *Args[3]; 12053 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12054 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12055 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12056 Addr.getPointer(), CGF.VoidPtrTy); 12057 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12058 // According to the standard, the original allocator type is a enum 12059 // (integer). Convert to pointer type, if required. 12060 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12061 CGF.getContext().VoidPtrTy, 12062 Allocator->getExprLoc()); 12063 Args[2] = AllocVal; 12064 12065 CGF.EmitRuntimeCall(RTLFn, Args); 12066 } 12067 }; 12068 Address VDAddr = 12069 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12070 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12071 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12072 VDAddr, AA->getAllocator()); 12073 if (UntiedRealAddr.isValid()) 12074 if (auto *Region = 12075 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12076 Region->emitUntiedSwitch(CGF); 12077 return VDAddr; 12078 } 12079 return UntiedAddr; 12080 } 12081 12082 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12083 const VarDecl *VD) const { 12084 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12085 if (It == FunctionToUntiedTaskStackMap.end()) 12086 return false; 12087 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12088 } 12089 12090 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12091 CodeGenModule &CGM, const OMPLoopDirective &S) 12092 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12093 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12094 if (!NeedToPush) 12095 return; 12096 NontemporalDeclsSet &DS = 12097 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12098 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12099 for (const Stmt *Ref : C->private_refs()) { 12100 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12101 const ValueDecl *VD; 12102 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12103 VD = DRE->getDecl(); 12104 } else { 12105 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12106 assert((ME->isImplicitCXXThis() || 12107 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12108 "Expected member of current class."); 12109 VD = ME->getMemberDecl(); 12110 } 12111 DS.insert(VD); 12112 } 12113 } 12114 } 12115 12116 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12117 if (!NeedToPush) 12118 return; 12119 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12120 } 12121 12122 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12123 CodeGenFunction &CGF, 12124 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12125 std::pair<Address, Address>> &LocalVars) 12126 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12127 if (!NeedToPush) 12128 return; 12129 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12130 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12131 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12132 } 12133 12134 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12135 if (!NeedToPush) 12136 return; 12137 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12138 } 12139 12140 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12141 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12142 12143 return llvm::any_of( 12144 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12145 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12146 } 12147 12148 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12149 const OMPExecutableDirective &S, 12150 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12151 const { 12152 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12153 // Vars in target/task regions must be excluded completely. 12154 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12155 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12156 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12157 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12158 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12159 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12160 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12161 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12162 } 12163 } 12164 // Exclude vars in private clauses. 12165 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12166 for (const Expr *Ref : C->varlists()) { 12167 if (!Ref->getType()->isScalarType()) 12168 continue; 12169 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12170 if (!DRE) 12171 continue; 12172 NeedToCheckForLPCs.insert(DRE->getDecl()); 12173 } 12174 } 12175 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12176 for (const Expr *Ref : C->varlists()) { 12177 if (!Ref->getType()->isScalarType()) 12178 continue; 12179 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12180 if (!DRE) 12181 continue; 12182 NeedToCheckForLPCs.insert(DRE->getDecl()); 12183 } 12184 } 12185 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12186 for (const Expr *Ref : C->varlists()) { 12187 if (!Ref->getType()->isScalarType()) 12188 continue; 12189 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12190 if (!DRE) 12191 continue; 12192 NeedToCheckForLPCs.insert(DRE->getDecl()); 12193 } 12194 } 12195 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12196 for (const Expr *Ref : C->varlists()) { 12197 if (!Ref->getType()->isScalarType()) 12198 continue; 12199 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12200 if (!DRE) 12201 continue; 12202 NeedToCheckForLPCs.insert(DRE->getDecl()); 12203 } 12204 } 12205 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12206 for (const Expr *Ref : C->varlists()) { 12207 if (!Ref->getType()->isScalarType()) 12208 continue; 12209 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12210 if (!DRE) 12211 continue; 12212 NeedToCheckForLPCs.insert(DRE->getDecl()); 12213 } 12214 } 12215 for (const Decl *VD : NeedToCheckForLPCs) { 12216 for (const LastprivateConditionalData &Data : 12217 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12218 if (Data.DeclToUniqueName.count(VD) > 0) { 12219 if (!Data.Disabled) 12220 NeedToAddForLPCsAsDisabled.insert(VD); 12221 break; 12222 } 12223 } 12224 } 12225 } 12226 12227 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12228 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12229 : CGM(CGF.CGM), 12230 Action((CGM.getLangOpts().OpenMP >= 50 && 12231 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12232 [](const OMPLastprivateClause *C) { 12233 return C->getKind() == 12234 OMPC_LASTPRIVATE_conditional; 12235 })) 12236 ? ActionToDo::PushAsLastprivateConditional 12237 : ActionToDo::DoNotPush) { 12238 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12239 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12240 return; 12241 assert(Action == ActionToDo::PushAsLastprivateConditional && 12242 "Expected a push action."); 12243 LastprivateConditionalData &Data = 12244 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12245 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12246 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12247 continue; 12248 12249 for (const Expr *Ref : C->varlists()) { 12250 Data.DeclToUniqueName.insert(std::make_pair( 12251 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12252 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12253 } 12254 } 12255 Data.IVLVal = IVLVal; 12256 Data.Fn = CGF.CurFn; 12257 } 12258 12259 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12260 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12261 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12262 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12263 if (CGM.getLangOpts().OpenMP < 50) 12264 return; 12265 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12266 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12267 if (!NeedToAddForLPCsAsDisabled.empty()) { 12268 Action = ActionToDo::DisableLastprivateConditional; 12269 LastprivateConditionalData &Data = 12270 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12271 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12272 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12273 Data.Fn = CGF.CurFn; 12274 Data.Disabled = true; 12275 } 12276 } 12277 12278 CGOpenMPRuntime::LastprivateConditionalRAII 12279 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12280 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12281 return LastprivateConditionalRAII(CGF, S); 12282 } 12283 12284 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12285 if (CGM.getLangOpts().OpenMP < 50) 12286 return; 12287 if (Action == ActionToDo::DisableLastprivateConditional) { 12288 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12289 "Expected list of disabled private vars."); 12290 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12291 } 12292 if (Action == ActionToDo::PushAsLastprivateConditional) { 12293 assert( 12294 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12295 "Expected list of lastprivate conditional vars."); 12296 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12297 } 12298 } 12299 12300 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12301 const VarDecl *VD) { 12302 ASTContext &C = CGM.getContext(); 12303 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12304 if (I == LastprivateConditionalToTypes.end()) 12305 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12306 QualType NewType; 12307 const FieldDecl *VDField; 12308 const FieldDecl *FiredField; 12309 LValue BaseLVal; 12310 auto VI = I->getSecond().find(VD); 12311 if (VI == I->getSecond().end()) { 12312 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12313 RD->startDefinition(); 12314 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12315 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12316 RD->completeDefinition(); 12317 NewType = C.getRecordType(RD); 12318 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12319 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12320 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12321 } else { 12322 NewType = std::get<0>(VI->getSecond()); 12323 VDField = std::get<1>(VI->getSecond()); 12324 FiredField = std::get<2>(VI->getSecond()); 12325 BaseLVal = std::get<3>(VI->getSecond()); 12326 } 12327 LValue FiredLVal = 12328 CGF.EmitLValueForField(BaseLVal, FiredField); 12329 CGF.EmitStoreOfScalar( 12330 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12331 FiredLVal); 12332 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12333 } 12334 12335 namespace { 12336 /// Checks if the lastprivate conditional variable is referenced in LHS. 12337 class LastprivateConditionalRefChecker final 12338 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12339 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12340 const Expr *FoundE = nullptr; 12341 const Decl *FoundD = nullptr; 12342 StringRef UniqueDeclName; 12343 LValue IVLVal; 12344 llvm::Function *FoundFn = nullptr; 12345 SourceLocation Loc; 12346 12347 public: 12348 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12349 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12350 llvm::reverse(LPM)) { 12351 auto It = D.DeclToUniqueName.find(E->getDecl()); 12352 if (It == D.DeclToUniqueName.end()) 12353 continue; 12354 if (D.Disabled) 12355 return false; 12356 FoundE = E; 12357 FoundD = E->getDecl()->getCanonicalDecl(); 12358 UniqueDeclName = It->second; 12359 IVLVal = D.IVLVal; 12360 FoundFn = D.Fn; 12361 break; 12362 } 12363 return FoundE == E; 12364 } 12365 bool VisitMemberExpr(const MemberExpr *E) { 12366 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12367 return false; 12368 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12369 llvm::reverse(LPM)) { 12370 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12371 if (It == D.DeclToUniqueName.end()) 12372 continue; 12373 if (D.Disabled) 12374 return false; 12375 FoundE = E; 12376 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12377 UniqueDeclName = It->second; 12378 IVLVal = D.IVLVal; 12379 FoundFn = D.Fn; 12380 break; 12381 } 12382 return FoundE == E; 12383 } 12384 bool VisitStmt(const Stmt *S) { 12385 for (const Stmt *Child : S->children()) { 12386 if (!Child) 12387 continue; 12388 if (const auto *E = dyn_cast<Expr>(Child)) 12389 if (!E->isGLValue()) 12390 continue; 12391 if (Visit(Child)) 12392 return true; 12393 } 12394 return false; 12395 } 12396 explicit LastprivateConditionalRefChecker( 12397 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12398 : LPM(LPM) {} 12399 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12400 getFoundData() const { 12401 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12402 } 12403 }; 12404 } // namespace 12405 12406 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12407 LValue IVLVal, 12408 StringRef UniqueDeclName, 12409 LValue LVal, 12410 SourceLocation Loc) { 12411 // Last updated loop counter for the lastprivate conditional var. 12412 // int<xx> last_iv = 0; 12413 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12414 llvm::Constant *LastIV = 12415 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12416 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12417 IVLVal.getAlignment().getAsAlign()); 12418 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12419 12420 // Last value of the lastprivate conditional. 12421 // decltype(priv_a) last_a; 12422 llvm::Constant *Last = getOrCreateInternalVariable( 12423 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12424 cast<llvm::GlobalVariable>(Last)->setAlignment( 12425 LVal.getAlignment().getAsAlign()); 12426 LValue LastLVal = 12427 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12428 12429 // Global loop counter. Required to handle inner parallel-for regions. 12430 // iv 12431 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12432 12433 // #pragma omp critical(a) 12434 // if (last_iv <= iv) { 12435 // last_iv = iv; 12436 // last_a = priv_a; 12437 // } 12438 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12439 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12440 Action.Enter(CGF); 12441 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12442 // (last_iv <= iv) ? Check if the variable is updated and store new 12443 // value in global var. 12444 llvm::Value *CmpRes; 12445 if (IVLVal.getType()->isSignedIntegerType()) { 12446 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12447 } else { 12448 assert(IVLVal.getType()->isUnsignedIntegerType() && 12449 "Loop iteration variable must be integer."); 12450 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12451 } 12452 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12453 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12454 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12455 // { 12456 CGF.EmitBlock(ThenBB); 12457 12458 // last_iv = iv; 12459 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12460 12461 // last_a = priv_a; 12462 switch (CGF.getEvaluationKind(LVal.getType())) { 12463 case TEK_Scalar: { 12464 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12465 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12466 break; 12467 } 12468 case TEK_Complex: { 12469 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12470 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12471 break; 12472 } 12473 case TEK_Aggregate: 12474 llvm_unreachable( 12475 "Aggregates are not supported in lastprivate conditional."); 12476 } 12477 // } 12478 CGF.EmitBranch(ExitBB); 12479 // There is no need to emit line number for unconditional branch. 12480 (void)ApplyDebugLocation::CreateEmpty(CGF); 12481 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12482 }; 12483 12484 if (CGM.getLangOpts().OpenMPSimd) { 12485 // Do not emit as a critical region as no parallel region could be emitted. 12486 RegionCodeGenTy ThenRCG(CodeGen); 12487 ThenRCG(CGF); 12488 } else { 12489 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12490 } 12491 } 12492 12493 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12494 const Expr *LHS) { 12495 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12496 return; 12497 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12498 if (!Checker.Visit(LHS)) 12499 return; 12500 const Expr *FoundE; 12501 const Decl *FoundD; 12502 StringRef UniqueDeclName; 12503 LValue IVLVal; 12504 llvm::Function *FoundFn; 12505 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12506 Checker.getFoundData(); 12507 if (FoundFn != CGF.CurFn) { 12508 // Special codegen for inner parallel regions. 12509 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12510 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12511 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12512 "Lastprivate conditional is not found in outer region."); 12513 QualType StructTy = std::get<0>(It->getSecond()); 12514 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12515 LValue PrivLVal = CGF.EmitLValue(FoundE); 12516 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12517 PrivLVal.getAddress(CGF), 12518 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12519 LValue BaseLVal = 12520 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12521 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12522 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12523 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12524 FiredLVal, llvm::AtomicOrdering::Unordered, 12525 /*IsVolatile=*/true, /*isInit=*/false); 12526 return; 12527 } 12528 12529 // Private address of the lastprivate conditional in the current context. 12530 // priv_a 12531 LValue LVal = CGF.EmitLValue(FoundE); 12532 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12533 FoundE->getExprLoc()); 12534 } 12535 12536 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12537 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12538 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12539 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12540 return; 12541 auto Range = llvm::reverse(LastprivateConditionalStack); 12542 auto It = llvm::find_if( 12543 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12544 if (It == Range.end() || It->Fn != CGF.CurFn) 12545 return; 12546 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12547 assert(LPCI != LastprivateConditionalToTypes.end() && 12548 "Lastprivates must be registered already."); 12549 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12550 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12551 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12552 for (const auto &Pair : It->DeclToUniqueName) { 12553 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12554 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12555 continue; 12556 auto I = LPCI->getSecond().find(Pair.first); 12557 assert(I != LPCI->getSecond().end() && 12558 "Lastprivate must be rehistered already."); 12559 // bool Cmp = priv_a.Fired != 0; 12560 LValue BaseLVal = std::get<3>(I->getSecond()); 12561 LValue FiredLVal = 12562 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12563 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12564 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12565 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12566 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12567 // if (Cmp) { 12568 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12569 CGF.EmitBlock(ThenBB); 12570 Address Addr = CGF.GetAddrOfLocalVar(VD); 12571 LValue LVal; 12572 if (VD->getType()->isReferenceType()) 12573 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12574 AlignmentSource::Decl); 12575 else 12576 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12577 AlignmentSource::Decl); 12578 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12579 D.getBeginLoc()); 12580 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12581 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12582 // } 12583 } 12584 } 12585 12586 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12587 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12588 SourceLocation Loc) { 12589 if (CGF.getLangOpts().OpenMP < 50) 12590 return; 12591 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12592 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12593 "Unknown lastprivate conditional variable."); 12594 StringRef UniqueName = It->second; 12595 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12596 // The variable was not updated in the region - exit. 12597 if (!GV) 12598 return; 12599 LValue LPLVal = CGF.MakeAddrLValue( 12600 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12601 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12602 CGF.EmitStoreOfScalar(Res, PrivLVal); 12603 } 12604 12605 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12606 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12607 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12608 llvm_unreachable("Not supported in SIMD-only mode"); 12609 } 12610 12611 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12612 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12613 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12614 llvm_unreachable("Not supported in SIMD-only mode"); 12615 } 12616 12617 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12618 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12619 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12620 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12621 bool Tied, unsigned &NumberOfParts) { 12622 llvm_unreachable("Not supported in SIMD-only mode"); 12623 } 12624 12625 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12626 SourceLocation Loc, 12627 llvm::Function *OutlinedFn, 12628 ArrayRef<llvm::Value *> CapturedVars, 12629 const Expr *IfCond) { 12630 llvm_unreachable("Not supported in SIMD-only mode"); 12631 } 12632 12633 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12634 CodeGenFunction &CGF, StringRef CriticalName, 12635 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12636 const Expr *Hint) { 12637 llvm_unreachable("Not supported in SIMD-only mode"); 12638 } 12639 12640 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12641 const RegionCodeGenTy &MasterOpGen, 12642 SourceLocation Loc) { 12643 llvm_unreachable("Not supported in SIMD-only mode"); 12644 } 12645 12646 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12647 const RegionCodeGenTy &MasterOpGen, 12648 SourceLocation Loc, 12649 const Expr *Filter) { 12650 llvm_unreachable("Not supported in SIMD-only mode"); 12651 } 12652 12653 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12654 SourceLocation Loc) { 12655 llvm_unreachable("Not supported in SIMD-only mode"); 12656 } 12657 12658 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12659 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12660 SourceLocation Loc) { 12661 llvm_unreachable("Not supported in SIMD-only mode"); 12662 } 12663 12664 void CGOpenMPSIMDRuntime::emitSingleRegion( 12665 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12666 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12667 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12668 ArrayRef<const Expr *> AssignmentOps) { 12669 llvm_unreachable("Not supported in SIMD-only mode"); 12670 } 12671 12672 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12673 const RegionCodeGenTy &OrderedOpGen, 12674 SourceLocation Loc, 12675 bool IsThreads) { 12676 llvm_unreachable("Not supported in SIMD-only mode"); 12677 } 12678 12679 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12680 SourceLocation Loc, 12681 OpenMPDirectiveKind Kind, 12682 bool EmitChecks, 12683 bool ForceSimpleCall) { 12684 llvm_unreachable("Not supported in SIMD-only mode"); 12685 } 12686 12687 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12688 CodeGenFunction &CGF, SourceLocation Loc, 12689 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12690 bool Ordered, const DispatchRTInput &DispatchValues) { 12691 llvm_unreachable("Not supported in SIMD-only mode"); 12692 } 12693 12694 void CGOpenMPSIMDRuntime::emitForStaticInit( 12695 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12696 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12697 llvm_unreachable("Not supported in SIMD-only mode"); 12698 } 12699 12700 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12701 CodeGenFunction &CGF, SourceLocation Loc, 12702 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12703 llvm_unreachable("Not supported in SIMD-only mode"); 12704 } 12705 12706 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12707 SourceLocation Loc, 12708 unsigned IVSize, 12709 bool IVSigned) { 12710 llvm_unreachable("Not supported in SIMD-only mode"); 12711 } 12712 12713 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12714 SourceLocation Loc, 12715 OpenMPDirectiveKind DKind) { 12716 llvm_unreachable("Not supported in SIMD-only mode"); 12717 } 12718 12719 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12720 SourceLocation Loc, 12721 unsigned IVSize, bool IVSigned, 12722 Address IL, Address LB, 12723 Address UB, Address ST) { 12724 llvm_unreachable("Not supported in SIMD-only mode"); 12725 } 12726 12727 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12728 llvm::Value *NumThreads, 12729 SourceLocation Loc) { 12730 llvm_unreachable("Not supported in SIMD-only mode"); 12731 } 12732 12733 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12734 ProcBindKind ProcBind, 12735 SourceLocation Loc) { 12736 llvm_unreachable("Not supported in SIMD-only mode"); 12737 } 12738 12739 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12740 const VarDecl *VD, 12741 Address VDAddr, 12742 SourceLocation Loc) { 12743 llvm_unreachable("Not supported in SIMD-only mode"); 12744 } 12745 12746 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12747 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12748 CodeGenFunction *CGF) { 12749 llvm_unreachable("Not supported in SIMD-only mode"); 12750 } 12751 12752 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12753 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12754 llvm_unreachable("Not supported in SIMD-only mode"); 12755 } 12756 12757 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12758 ArrayRef<const Expr *> Vars, 12759 SourceLocation Loc, 12760 llvm::AtomicOrdering AO) { 12761 llvm_unreachable("Not supported in SIMD-only mode"); 12762 } 12763 12764 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12765 const OMPExecutableDirective &D, 12766 llvm::Function *TaskFunction, 12767 QualType SharedsTy, Address Shareds, 12768 const Expr *IfCond, 12769 const OMPTaskDataTy &Data) { 12770 llvm_unreachable("Not supported in SIMD-only mode"); 12771 } 12772 12773 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12774 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12775 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12776 const Expr *IfCond, const OMPTaskDataTy &Data) { 12777 llvm_unreachable("Not supported in SIMD-only mode"); 12778 } 12779 12780 void CGOpenMPSIMDRuntime::emitReduction( 12781 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12782 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12783 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12784 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12785 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12786 ReductionOps, Options); 12787 } 12788 12789 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12790 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12791 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12792 llvm_unreachable("Not supported in SIMD-only mode"); 12793 } 12794 12795 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12796 SourceLocation Loc, 12797 bool IsWorksharingReduction) { 12798 llvm_unreachable("Not supported in SIMD-only mode"); 12799 } 12800 12801 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12802 SourceLocation Loc, 12803 ReductionCodeGen &RCG, 12804 unsigned N) { 12805 llvm_unreachable("Not supported in SIMD-only mode"); 12806 } 12807 12808 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12809 SourceLocation Loc, 12810 llvm::Value *ReductionsPtr, 12811 LValue SharedLVal) { 12812 llvm_unreachable("Not supported in SIMD-only mode"); 12813 } 12814 12815 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12816 SourceLocation Loc) { 12817 llvm_unreachable("Not supported in SIMD-only mode"); 12818 } 12819 12820 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12821 CodeGenFunction &CGF, SourceLocation Loc, 12822 OpenMPDirectiveKind CancelRegion) { 12823 llvm_unreachable("Not supported in SIMD-only mode"); 12824 } 12825 12826 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12827 SourceLocation Loc, const Expr *IfCond, 12828 OpenMPDirectiveKind CancelRegion) { 12829 llvm_unreachable("Not supported in SIMD-only mode"); 12830 } 12831 12832 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12833 const OMPExecutableDirective &D, StringRef ParentName, 12834 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12835 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12836 llvm_unreachable("Not supported in SIMD-only mode"); 12837 } 12838 12839 void CGOpenMPSIMDRuntime::emitTargetCall( 12840 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12841 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12842 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12843 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12844 const OMPLoopDirective &D)> 12845 SizeEmitter) { 12846 llvm_unreachable("Not supported in SIMD-only mode"); 12847 } 12848 12849 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12850 llvm_unreachable("Not supported in SIMD-only mode"); 12851 } 12852 12853 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12854 llvm_unreachable("Not supported in SIMD-only mode"); 12855 } 12856 12857 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12858 return false; 12859 } 12860 12861 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12862 const OMPExecutableDirective &D, 12863 SourceLocation Loc, 12864 llvm::Function *OutlinedFn, 12865 ArrayRef<llvm::Value *> CapturedVars) { 12866 llvm_unreachable("Not supported in SIMD-only mode"); 12867 } 12868 12869 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12870 const Expr *NumTeams, 12871 const Expr *ThreadLimit, 12872 SourceLocation Loc) { 12873 llvm_unreachable("Not supported in SIMD-only mode"); 12874 } 12875 12876 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12877 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12878 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12879 llvm_unreachable("Not supported in SIMD-only mode"); 12880 } 12881 12882 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12883 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12884 const Expr *Device) { 12885 llvm_unreachable("Not supported in SIMD-only mode"); 12886 } 12887 12888 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12889 const OMPLoopDirective &D, 12890 ArrayRef<Expr *> NumIterations) { 12891 llvm_unreachable("Not supported in SIMD-only mode"); 12892 } 12893 12894 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12895 const OMPDependClause *C) { 12896 llvm_unreachable("Not supported in SIMD-only mode"); 12897 } 12898 12899 const VarDecl * 12900 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12901 const VarDecl *NativeParam) const { 12902 llvm_unreachable("Not supported in SIMD-only mode"); 12903 } 12904 12905 Address 12906 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12907 const VarDecl *NativeParam, 12908 const VarDecl *TargetParam) const { 12909 llvm_unreachable("Not supported in SIMD-only mode"); 12910 } 12911