1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 bool NoInheritance = false; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel, 421 bool NoInheritance = true) 422 : CGF(CGF), NoInheritance(NoInheritance) { 423 // Start emission for the construct. 424 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 425 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 426 if (NoInheritance) { 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 429 CGF.LambdaThisCaptureField = nullptr; 430 BlockInfo = CGF.BlockInfo; 431 CGF.BlockInfo = nullptr; 432 } 433 } 434 435 ~InlinedOpenMPRegionRAII() { 436 // Restore original CapturedStmtInfo only if we're done with code emission. 437 auto *OldCSI = 438 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 439 delete CGF.CapturedStmtInfo; 440 CGF.CapturedStmtInfo = OldCSI; 441 if (NoInheritance) { 442 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 443 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 444 CGF.BlockInfo = BlockInfo; 445 } 446 } 447 }; 448 449 /// Values for bit flags used in the ident_t to describe the fields. 450 /// All enumeric elements are named and described in accordance with the code 451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 452 enum OpenMPLocationFlags : unsigned { 453 /// Use trampoline for internal microtask. 454 OMP_IDENT_IMD = 0x01, 455 /// Use c-style ident structure. 456 OMP_IDENT_KMPC = 0x02, 457 /// Atomic reduction option for kmpc_reduce. 458 OMP_ATOMIC_REDUCE = 0x10, 459 /// Explicit 'barrier' directive. 460 OMP_IDENT_BARRIER_EXPL = 0x20, 461 /// Implicit barrier in code. 462 OMP_IDENT_BARRIER_IMPL = 0x40, 463 /// Implicit barrier in 'for' directive. 464 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 465 /// Implicit barrier in 'sections' directive. 466 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 467 /// Implicit barrier in 'single' directive. 468 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 469 /// Call of __kmp_for_static_init for static loop. 470 OMP_IDENT_WORK_LOOP = 0x200, 471 /// Call of __kmp_for_static_init for sections. 472 OMP_IDENT_WORK_SECTIONS = 0x400, 473 /// Call of __kmp_for_static_init for distribute. 474 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 475 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 476 }; 477 478 namespace { 479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 480 /// Values for bit flags for marking which requires clauses have been used. 481 enum OpenMPOffloadingRequiresDirFlags : int64_t { 482 /// flag undefined. 483 OMP_REQ_UNDEFINED = 0x000, 484 /// no requires clause present. 485 OMP_REQ_NONE = 0x001, 486 /// reverse_offload clause. 487 OMP_REQ_REVERSE_OFFLOAD = 0x002, 488 /// unified_address clause. 489 OMP_REQ_UNIFIED_ADDRESS = 0x004, 490 /// unified_shared_memory clause. 491 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 492 /// dynamic_allocators clause. 493 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 494 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 495 }; 496 497 enum OpenMPOffloadingReservedDeviceIDs { 498 /// Device ID if the device was not defined, runtime should get it 499 /// from environment variables in the spec. 500 OMP_DEVICEID_UNDEF = -1, 501 }; 502 } // anonymous namespace 503 504 /// Describes ident structure that describes a source location. 505 /// All descriptions are taken from 506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 507 /// Original structure: 508 /// typedef struct ident { 509 /// kmp_int32 reserved_1; /**< might be used in Fortran; 510 /// see above */ 511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 512 /// KMP_IDENT_KMPC identifies this union 513 /// member */ 514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 515 /// see above */ 516 ///#if USE_ITT_BUILD 517 /// /* but currently used for storing 518 /// region-specific ITT */ 519 /// /* contextual information. */ 520 ///#endif /* USE_ITT_BUILD */ 521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 522 /// C++ */ 523 /// char const *psource; /**< String describing the source location. 524 /// The string is composed of semi-colon separated 525 // fields which describe the source file, 526 /// the function and a pair of line numbers that 527 /// delimit the construct. 528 /// */ 529 /// } ident_t; 530 enum IdentFieldIndex { 531 /// might be used in Fortran 532 IdentField_Reserved_1, 533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 534 IdentField_Flags, 535 /// Not really used in Fortran any more 536 IdentField_Reserved_2, 537 /// Source[4] in Fortran, do not use for C++ 538 IdentField_Reserved_3, 539 /// String describing the source location. The string is composed of 540 /// semi-colon separated fields which describe the source file, the function 541 /// and a pair of line numbers that delimit the construct. 542 IdentField_PSource 543 }; 544 545 /// Schedule types for 'omp for' loops (these enumerators are taken from 546 /// the enum sched_type in kmp.h). 547 enum OpenMPSchedType { 548 /// Lower bound for default (unordered) versions. 549 OMP_sch_lower = 32, 550 OMP_sch_static_chunked = 33, 551 OMP_sch_static = 34, 552 OMP_sch_dynamic_chunked = 35, 553 OMP_sch_guided_chunked = 36, 554 OMP_sch_runtime = 37, 555 OMP_sch_auto = 38, 556 /// static with chunk adjustment (e.g., simd) 557 OMP_sch_static_balanced_chunked = 45, 558 /// Lower bound for 'ordered' versions. 559 OMP_ord_lower = 64, 560 OMP_ord_static_chunked = 65, 561 OMP_ord_static = 66, 562 OMP_ord_dynamic_chunked = 67, 563 OMP_ord_guided_chunked = 68, 564 OMP_ord_runtime = 69, 565 OMP_ord_auto = 70, 566 OMP_sch_default = OMP_sch_static, 567 /// dist_schedule types 568 OMP_dist_sch_static_chunked = 91, 569 OMP_dist_sch_static = 92, 570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 571 /// Set if the monotonic schedule modifier was present. 572 OMP_sch_modifier_monotonic = (1 << 29), 573 /// Set if the nonmonotonic schedule modifier was present. 574 OMP_sch_modifier_nonmonotonic = (1 << 30), 575 }; 576 577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 578 /// region. 579 class CleanupTy final : public EHScopeStack::Cleanup { 580 PrePostActionTy *Action; 581 582 public: 583 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 584 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 585 if (!CGF.HaveInsertPoint()) 586 return; 587 Action->Exit(CGF); 588 } 589 }; 590 591 } // anonymous namespace 592 593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 594 CodeGenFunction::RunCleanupsScope Scope(CGF); 595 if (PrePostAction) { 596 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 597 Callback(CodeGen, CGF, *PrePostAction); 598 } else { 599 PrePostActionTy Action; 600 Callback(CodeGen, CGF, Action); 601 } 602 } 603 604 /// Check if the combiner is a call to UDR combiner and if it is so return the 605 /// UDR decl used for reduction. 606 static const OMPDeclareReductionDecl * 607 getReductionInit(const Expr *ReductionOp) { 608 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 609 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 610 if (const auto *DRE = 611 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 612 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 613 return DRD; 614 return nullptr; 615 } 616 617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 618 const OMPDeclareReductionDecl *DRD, 619 const Expr *InitOp, 620 Address Private, Address Original, 621 QualType Ty) { 622 if (DRD->getInitializer()) { 623 std::pair<llvm::Function *, llvm::Function *> Reduction = 624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 625 const auto *CE = cast<CallExpr>(InitOp); 626 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 627 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 628 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 629 const auto *LHSDRE = 630 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 631 const auto *RHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 633 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 634 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 635 [=]() { return Private; }); 636 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 637 [=]() { return Original; }); 638 (void)PrivateScope.Privatize(); 639 RValue Func = RValue::get(Reduction.second); 640 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 641 CGF.EmitIgnoredExpr(InitOp); 642 } else { 643 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 644 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 645 auto *GV = new llvm::GlobalVariable( 646 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 647 llvm::GlobalValue::PrivateLinkage, Init, Name); 648 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 649 RValue InitRVal; 650 switch (CGF.getEvaluationKind(Ty)) { 651 case TEK_Scalar: 652 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 653 break; 654 case TEK_Complex: 655 InitRVal = 656 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 657 break; 658 case TEK_Aggregate: { 659 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 660 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 661 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 662 /*IsInitializer=*/false); 663 return; 664 } 665 } 666 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 667 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 668 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 669 /*IsInitializer=*/false); 670 } 671 } 672 673 /// Emit initialization of arrays of complex types. 674 /// \param DestAddr Address of the array. 675 /// \param Type Type of array. 676 /// \param Init Initial expression of array. 677 /// \param SrcAddr Address of the original array. 678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 679 QualType Type, bool EmitDeclareReductionInit, 680 const Expr *Init, 681 const OMPDeclareReductionDecl *DRD, 682 Address SrcAddr = Address::invalid()) { 683 // Perform element-by-element initialization. 684 QualType ElementTy; 685 686 // Drill down to the base element type on both arrays. 687 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 688 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 689 DestAddr = 690 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 746 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 747 } 748 749 // Shift the address forward by one element. 750 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 751 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 752 // Check whether we've reached the end. 753 llvm::Value *Done = 754 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 755 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 756 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 757 758 // Done. 759 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 760 } 761 762 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 763 return CGF.EmitOMPSharedLValue(E); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 767 const Expr *E) { 768 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 769 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 770 return LValue(); 771 } 772 773 void ReductionCodeGen::emitAggregateInitialization( 774 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 775 const OMPDeclareReductionDecl *DRD) { 776 // Emit VarDecl with copy init for arrays. 777 // Get the address of the original variable captured in current 778 // captured region. 779 const auto *PrivateVD = 780 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 781 bool EmitDeclareReductionInit = 782 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 783 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 784 EmitDeclareReductionInit, 785 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 786 : PrivateVD->getInit(), 787 DRD, SharedLVal.getAddress(CGF)); 788 } 789 790 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 791 ArrayRef<const Expr *> Origs, 792 ArrayRef<const Expr *> Privates, 793 ArrayRef<const Expr *> ReductionOps) { 794 ClausesData.reserve(Shareds.size()); 795 SharedAddresses.reserve(Shareds.size()); 796 Sizes.reserve(Shareds.size()); 797 BaseDecls.reserve(Shareds.size()); 798 const auto *IOrig = Origs.begin(); 799 const auto *IPriv = Privates.begin(); 800 const auto *IRed = ReductionOps.begin(); 801 for (const Expr *Ref : Shareds) { 802 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 803 std::advance(IOrig, 1); 804 std::advance(IPriv, 1); 805 std::advance(IRed, 1); 806 } 807 } 808 809 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 810 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 811 "Number of generated lvalues must be exactly N."); 812 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 813 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 814 SharedAddresses.emplace_back(First, Second); 815 if (ClausesData[N].Shared == ClausesData[N].Ref) { 816 OrigAddresses.emplace_back(First, Second); 817 } else { 818 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 819 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 820 OrigAddresses.emplace_back(First, Second); 821 } 822 } 823 824 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 825 const auto *PrivateVD = 826 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 827 QualType PrivateType = PrivateVD->getType(); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = 838 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 839 ->getElementType(); 840 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 841 if (AsArraySection) { 842 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 843 OrigAddresses[N].first.getPointer(CGF)); 844 Size = CGF.Builder.CreateNUWAdd( 845 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 846 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 847 } else { 848 SizeInChars = 849 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 850 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 851 } 852 Sizes.emplace_back(SizeInChars, Size); 853 CodeGenFunction::OpaqueValueMapping OpaqueMap( 854 CGF, 855 cast<OpaqueValueExpr>( 856 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 857 RValue::get(Size)); 858 CGF.EmitVariablyModifiedType(PrivateType); 859 } 860 861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 862 llvm::Value *Size) { 863 const auto *PrivateVD = 864 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 865 QualType PrivateType = PrivateVD->getType(); 866 if (!PrivateType->isVariablyModifiedType()) { 867 assert(!Size && !Sizes[N].second && 868 "Size should be nullptr for non-variably modified reduction " 869 "items."); 870 return; 871 } 872 CodeGenFunction::OpaqueValueMapping OpaqueMap( 873 CGF, 874 cast<OpaqueValueExpr>( 875 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 876 RValue::get(Size)); 877 CGF.EmitVariablyModifiedType(PrivateType); 878 } 879 880 void ReductionCodeGen::emitInitialization( 881 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 882 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 883 assert(SharedAddresses.size() > N && "No variable was generated"); 884 const auto *PrivateVD = 885 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 886 const OMPDeclareReductionDecl *DRD = 887 getReductionInit(ClausesData[N].ReductionOp); 888 QualType PrivateType = PrivateVD->getType(); 889 PrivateAddr = CGF.Builder.CreateElementBitCast( 890 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 SharedLVal = CGF.MakeAddrLValue( 893 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 894 CGF.ConvertTypeForMem(SharedType)), 895 SharedType, SharedAddresses[N].first.getBaseInfo(), 896 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 897 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 898 if (DRD && DRD->getInitializer()) 899 (void)DefaultInit(CGF); 900 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 901 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 902 (void)DefaultInit(CGF); 903 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 904 PrivateAddr, SharedLVal.getAddress(CGF), 905 SharedLVal.getType()); 906 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 907 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 908 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 909 PrivateVD->getType().getQualifiers(), 910 /*IsInitializer=*/false); 911 } 912 } 913 914 bool ReductionCodeGen::needCleanups(unsigned N) { 915 const auto *PrivateVD = 916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 917 QualType PrivateType = PrivateVD->getType(); 918 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 919 return DTorKind != QualType::DK_none; 920 } 921 922 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 923 Address PrivateAddr) { 924 const auto *PrivateVD = 925 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 926 QualType PrivateType = PrivateVD->getType(); 927 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 928 if (needCleanups(N)) { 929 PrivateAddr = CGF.Builder.CreateElementBitCast( 930 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 931 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 932 } 933 } 934 935 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 936 LValue BaseLV) { 937 BaseTy = BaseTy.getNonReferenceType(); 938 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 939 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 940 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 941 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 942 } else { 943 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 944 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 945 } 946 BaseTy = BaseTy->getPointeeType(); 947 } 948 return CGF.MakeAddrLValue( 949 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 950 CGF.ConvertTypeForMem(ElTy)), 951 BaseLV.getType(), BaseLV.getBaseInfo(), 952 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 953 } 954 955 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 956 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 957 llvm::Value *Addr) { 958 Address Tmp = Address::invalid(); 959 Address TopTmp = Address::invalid(); 960 Address MostTopTmp = Address::invalid(); 961 BaseTy = BaseTy.getNonReferenceType(); 962 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 963 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 964 Tmp = CGF.CreateMemTemp(BaseTy); 965 if (TopTmp.isValid()) 966 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 967 else 968 MostTopTmp = Tmp; 969 TopTmp = Tmp; 970 BaseTy = BaseTy->getPointeeType(); 971 } 972 llvm::Type *Ty = BaseLVType; 973 if (Tmp.isValid()) 974 Ty = Tmp.getElementType(); 975 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 976 if (Tmp.isValid()) { 977 CGF.Builder.CreateStore(Addr, Tmp); 978 return MostTopTmp; 979 } 980 return Address(Addr, BaseLVAlignment); 981 } 982 983 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 984 const VarDecl *OrigVD = nullptr; 985 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 986 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 987 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 988 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 989 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 990 Base = TempASE->getBase()->IgnoreParenImpCasts(); 991 DE = cast<DeclRefExpr>(Base); 992 OrigVD = cast<VarDecl>(DE->getDecl()); 993 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 994 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 995 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 996 Base = TempASE->getBase()->IgnoreParenImpCasts(); 997 DE = cast<DeclRefExpr>(Base); 998 OrigVD = cast<VarDecl>(DE->getDecl()); 999 } 1000 return OrigVD; 1001 } 1002 1003 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1004 Address PrivateAddr) { 1005 const DeclRefExpr *DE; 1006 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1007 BaseDecls.emplace_back(OrigVD); 1008 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1009 LValue BaseLValue = 1010 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue); 1012 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1013 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1014 llvm::Value *PrivatePointer = 1015 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1016 PrivateAddr.getPointer(), 1017 SharedAddresses[N].first.getAddress(CGF).getType()); 1018 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1019 return castToBase(CGF, OrigVD->getType(), 1020 SharedAddresses[N].first.getType(), 1021 OriginalBaseLValue.getAddress(CGF).getType(), 1022 OriginalBaseLValue.getAlignment(), Ptr); 1023 } 1024 BaseDecls.emplace_back( 1025 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1026 return PrivateAddr; 1027 } 1028 1029 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1030 const OMPDeclareReductionDecl *DRD = 1031 getReductionInit(ClausesData[N].ReductionOp); 1032 return DRD && DRD->getInitializer(); 1033 } 1034 1035 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1036 return CGF.EmitLoadOfPointerLValue( 1037 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1038 getThreadIDVariable()->getType()->castAs<PointerType>()); 1039 } 1040 1041 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1042 if (!CGF.HaveInsertPoint()) 1043 return; 1044 // 1.2.2 OpenMP Language Terminology 1045 // Structured block - An executable statement with a single entry at the 1046 // top and a single exit at the bottom. 1047 // The point of exit cannot be a branch out of the structured block. 1048 // longjmp() and throw() must not violate the entry/exit criteria. 1049 CGF.EHStack.pushTerminate(); 1050 if (S) 1051 CGF.incrementProfileCounter(S); 1052 CodeGen(CGF); 1053 CGF.EHStack.popTerminate(); 1054 } 1055 1056 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1057 CodeGenFunction &CGF) { 1058 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1059 getThreadIDVariable()->getType(), 1060 AlignmentSource::Decl); 1061 } 1062 1063 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1064 QualType FieldTy) { 1065 auto *Field = FieldDecl::Create( 1066 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1067 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1068 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1069 Field->setAccess(AS_public); 1070 DC->addDecl(Field); 1071 return Field; 1072 } 1073 1074 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1075 StringRef Separator) 1076 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1077 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1078 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1079 1080 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1081 OMPBuilder.initialize(); 1082 loadOffloadInfoMetadata(); 1083 } 1084 1085 void CGOpenMPRuntime::clear() { 1086 InternalVars.clear(); 1087 // Clean non-target variable declarations possibly used only in debug info. 1088 for (const auto &Data : EmittedNonTargetVariables) { 1089 if (!Data.getValue().pointsToAliveValue()) 1090 continue; 1091 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1092 if (!GV) 1093 continue; 1094 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1095 continue; 1096 GV->eraseFromParent(); 1097 } 1098 } 1099 1100 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1101 SmallString<128> Buffer; 1102 llvm::raw_svector_ostream OS(Buffer); 1103 StringRef Sep = FirstSeparator; 1104 for (StringRef Part : Parts) { 1105 OS << Sep << Part; 1106 Sep = Separator; 1107 } 1108 return std::string(OS.str()); 1109 } 1110 1111 static llvm::Function * 1112 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1113 const Expr *CombinerInitializer, const VarDecl *In, 1114 const VarDecl *Out, bool IsCombiner) { 1115 // void .omp_combiner.(Ty *in, Ty *out); 1116 ASTContext &C = CGM.getContext(); 1117 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1118 FunctionArgList Args; 1119 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1122 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1123 Args.push_back(&OmpOutParm); 1124 Args.push_back(&OmpInParm); 1125 const CGFunctionInfo &FnInfo = 1126 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1127 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1128 std::string Name = CGM.getOpenMPRuntime().getName( 1129 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1130 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1131 Name, &CGM.getModule()); 1132 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1133 if (CGM.getLangOpts().Optimize) { 1134 Fn->removeFnAttr(llvm::Attribute::NoInline); 1135 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1136 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1137 } 1138 CodeGenFunction CGF(CGM); 1139 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1140 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1141 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1142 Out->getLocation()); 1143 CodeGenFunction::OMPPrivateScope Scope(CGF); 1144 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1145 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1146 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1147 .getAddress(CGF); 1148 }); 1149 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1150 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 (void)Scope.Privatize(); 1155 if (!IsCombiner && Out->hasInit() && 1156 !CGF.isTrivialInitializer(Out->getInit())) { 1157 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1158 Out->getType().getQualifiers(), 1159 /*IsInitializer=*/true); 1160 } 1161 if (CombinerInitializer) 1162 CGF.EmitIgnoredExpr(CombinerInitializer); 1163 Scope.ForceCleanup(); 1164 CGF.FinishFunction(); 1165 return Fn; 1166 } 1167 1168 void CGOpenMPRuntime::emitUserDefinedReduction( 1169 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1170 if (UDRMap.count(D) > 0) 1171 return; 1172 llvm::Function *Combiner = emitCombinerOrInitializer( 1173 CGM, D->getType(), D->getCombiner(), 1174 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1176 /*IsCombiner=*/true); 1177 llvm::Function *Initializer = nullptr; 1178 if (const Expr *Init = D->getInitializer()) { 1179 Initializer = emitCombinerOrInitializer( 1180 CGM, D->getType(), 1181 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1182 : nullptr, 1183 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1185 /*IsCombiner=*/false); 1186 } 1187 UDRMap.try_emplace(D, Combiner, Initializer); 1188 if (CGF) { 1189 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1190 Decls.second.push_back(D); 1191 } 1192 } 1193 1194 std::pair<llvm::Function *, llvm::Function *> 1195 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1196 auto I = UDRMap.find(D); 1197 if (I != UDRMap.end()) 1198 return I->second; 1199 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1200 return UDRMap.lookup(D); 1201 } 1202 1203 namespace { 1204 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1205 // Builder if one is present. 1206 struct PushAndPopStackRAII { 1207 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1208 bool HasCancel, llvm::omp::Directive Kind) 1209 : OMPBuilder(OMPBuilder) { 1210 if (!OMPBuilder) 1211 return; 1212 1213 // The following callback is the crucial part of clangs cleanup process. 1214 // 1215 // NOTE: 1216 // Once the OpenMPIRBuilder is used to create parallel regions (and 1217 // similar), the cancellation destination (Dest below) is determined via 1218 // IP. That means if we have variables to finalize we split the block at IP, 1219 // use the new block (=BB) as destination to build a JumpDest (via 1220 // getJumpDestInCurrentScope(BB)) which then is fed to 1221 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1222 // to push & pop an FinalizationInfo object. 1223 // The FiniCB will still be needed but at the point where the 1224 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1225 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1226 assert(IP.getBlock()->end() == IP.getPoint() && 1227 "Clang CG should cause non-terminated block!"); 1228 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1229 CGF.Builder.restoreIP(IP); 1230 CodeGenFunction::JumpDest Dest = 1231 CGF.getOMPCancelDestination(OMPD_parallel); 1232 CGF.EmitBranchThroughCleanup(Dest); 1233 }; 1234 1235 // TODO: Remove this once we emit parallel regions through the 1236 // OpenMPIRBuilder as it can do this setup internally. 1237 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1238 OMPBuilder->pushFinalizationCB(std::move(FI)); 1239 } 1240 ~PushAndPopStackRAII() { 1241 if (OMPBuilder) 1242 OMPBuilder->popFinalizationCB(); 1243 } 1244 llvm::OpenMPIRBuilder *OMPBuilder; 1245 }; 1246 } // namespace 1247 1248 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1249 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1250 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1251 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1252 assert(ThreadIDVar->getType()->isPointerType() && 1253 "thread id variable must be of type kmp_int32 *"); 1254 CodeGenFunction CGF(CGM, true); 1255 bool HasCancel = false; 1256 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1257 HasCancel = OPD->hasCancel(); 1258 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1259 HasCancel = OPD->hasCancel(); 1260 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1261 HasCancel = OPSD->hasCancel(); 1262 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1263 HasCancel = OPFD->hasCancel(); 1264 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1267 HasCancel = OPFD->hasCancel(); 1268 else if (const auto *OPFD = 1269 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = 1272 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1273 HasCancel = OPFD->hasCancel(); 1274 1275 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1276 // parallel region to make cancellation barriers work properly. 1277 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1278 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1279 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1280 HasCancel, OutlinedHelperName); 1281 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1282 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1283 } 1284 1285 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1287 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1288 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1289 return emitParallelOrTeamsOutlinedFunction( 1290 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1291 } 1292 1293 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1294 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1295 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1296 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1297 return emitParallelOrTeamsOutlinedFunction( 1298 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1299 } 1300 1301 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1302 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1303 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1304 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1305 bool Tied, unsigned &NumberOfParts) { 1306 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1307 PrePostActionTy &) { 1308 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1309 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1310 llvm::Value *TaskArgs[] = { 1311 UpLoc, ThreadID, 1312 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1313 TaskTVar->getType()->castAs<PointerType>()) 1314 .getPointer(CGF)}; 1315 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1316 CGM.getModule(), OMPRTL___kmpc_omp_task), 1317 TaskArgs); 1318 }; 1319 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1320 UntiedCodeGen); 1321 CodeGen.setAction(Action); 1322 assert(!ThreadIDVar->getType()->isPointerType() && 1323 "thread id variable must be of type kmp_int32 for tasks"); 1324 const OpenMPDirectiveKind Region = 1325 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1326 : OMPD_task; 1327 const CapturedStmt *CS = D.getCapturedStmt(Region); 1328 bool HasCancel = false; 1329 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1332 HasCancel = TD->hasCancel(); 1333 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 1338 CodeGenFunction CGF(CGM, true); 1339 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1340 InnermostKind, HasCancel, Action); 1341 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1342 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1343 if (!Tied) 1344 NumberOfParts = Action.getNumberOfParts(); 1345 return Res; 1346 } 1347 1348 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1349 const RecordDecl *RD, const CGRecordLayout &RL, 1350 ArrayRef<llvm::Constant *> Data) { 1351 llvm::StructType *StructTy = RL.getLLVMType(); 1352 unsigned PrevIdx = 0; 1353 ConstantInitBuilder CIBuilder(CGM); 1354 auto DI = Data.begin(); 1355 for (const FieldDecl *FD : RD->fields()) { 1356 unsigned Idx = RL.getLLVMFieldNo(FD); 1357 // Fill the alignment. 1358 for (unsigned I = PrevIdx; I < Idx; ++I) 1359 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1360 PrevIdx = Idx + 1; 1361 Fields.add(*DI); 1362 ++DI; 1363 } 1364 } 1365 1366 template <class... As> 1367 static llvm::GlobalVariable * 1368 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1369 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1370 As &&... Args) { 1371 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1372 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1373 ConstantInitBuilder CIBuilder(CGM); 1374 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1375 buildStructValue(Fields, CGM, RD, RL, Data); 1376 return Fields.finishAndCreateGlobal( 1377 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1378 std::forward<As>(Args)...); 1379 } 1380 1381 template <typename T> 1382 static void 1383 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1384 ArrayRef<llvm::Constant *> Data, 1385 T &Parent) { 1386 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1387 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1388 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1389 buildStructValue(Fields, CGM, RD, RL, Data); 1390 Fields.finishAndAddTo(Parent); 1391 } 1392 1393 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1394 bool AtCurrentPoint) { 1395 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1396 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1397 1398 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1399 if (AtCurrentPoint) { 1400 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1401 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1402 } else { 1403 Elem.second.ServiceInsertPt = 1404 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1405 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1406 } 1407 } 1408 1409 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1410 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1411 if (Elem.second.ServiceInsertPt) { 1412 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1413 Elem.second.ServiceInsertPt = nullptr; 1414 Ptr->eraseFromParent(); 1415 } 1416 } 1417 1418 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1419 SourceLocation Loc, 1420 SmallString<128> &Buffer) { 1421 llvm::raw_svector_ostream OS(Buffer); 1422 // Build debug location 1423 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1424 OS << ";" << PLoc.getFilename() << ";"; 1425 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1426 OS << FD->getQualifiedNameAsString(); 1427 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1428 return OS.str(); 1429 } 1430 1431 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1432 SourceLocation Loc, 1433 unsigned Flags) { 1434 llvm::Constant *SrcLocStr; 1435 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1436 Loc.isInvalid()) { 1437 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1438 } else { 1439 std::string FunctionName = ""; 1440 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1441 FunctionName = FD->getQualifiedNameAsString(); 1442 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1443 const char *FileName = PLoc.getFilename(); 1444 unsigned Line = PLoc.getLine(); 1445 unsigned Column = PLoc.getColumn(); 1446 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1447 Line, Column); 1448 } 1449 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1450 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1451 Reserved2Flags); 1452 } 1453 1454 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1455 SourceLocation Loc) { 1456 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1457 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1458 // the clang invariants used below might be broken. 1459 if (CGM.getLangOpts().OpenMPIRBuilder) { 1460 SmallString<128> Buffer; 1461 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1464 return OMPBuilder.getOrCreateThreadID( 1465 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1466 } 1467 1468 llvm::Value *ThreadID = nullptr; 1469 // Check whether we've already cached a load of the thread id in this 1470 // function. 1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1472 if (I != OpenMPLocThreadIDMap.end()) { 1473 ThreadID = I->second.ThreadID; 1474 if (ThreadID != nullptr) 1475 return ThreadID; 1476 } 1477 // If exceptions are enabled, do not use parameter to avoid possible crash. 1478 if (auto *OMPRegionInfo = 1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1480 if (OMPRegionInfo->getThreadIDVariable()) { 1481 // Check if this an outlined function with thread id passed as argument. 1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1485 !CGF.getLangOpts().CXXExceptions || 1486 CGF.Builder.GetInsertBlock() == TopBlock || 1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1489 TopBlock || 1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1491 CGF.Builder.GetInsertBlock()) { 1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1493 // If value loaded in entry block, cache it and use it everywhere in 1494 // function. 1495 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1497 Elem.second.ThreadID = ThreadID; 1498 } 1499 return ThreadID; 1500 } 1501 } 1502 } 1503 1504 // This is not an outlined function region - need to call __kmpc_int32 1505 // kmpc_global_thread_num(ident_t *loc). 1506 // Generate thread id value and cache this value for use across the 1507 // function. 1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1509 if (!Elem.second.ServiceInsertPt) 1510 setLocThreadIdInsertPt(CGF); 1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1513 llvm::CallInst *Call = CGF.Builder.CreateCall( 1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1515 OMPRTL___kmpc_global_thread_num), 1516 emitUpdateLocation(CGF, Loc)); 1517 Call->setCallingConv(CGF.getRuntimeCC()); 1518 Elem.second.ThreadID = Call; 1519 return Call; 1520 } 1521 1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1523 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1525 clearLocThreadIdInsertPt(CGF); 1526 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1527 } 1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1529 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1530 UDRMap.erase(D); 1531 FunctionUDRMap.erase(CGF.CurFn); 1532 } 1533 auto I = FunctionUDMMap.find(CGF.CurFn); 1534 if (I != FunctionUDMMap.end()) { 1535 for(const auto *D : I->second) 1536 UDMMap.erase(D); 1537 FunctionUDMMap.erase(I); 1538 } 1539 LastprivateConditionalToTypes.erase(CGF.CurFn); 1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1541 } 1542 1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1544 return OMPBuilder.IdentPtr; 1545 } 1546 1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1548 if (!Kmpc_MicroTy) { 1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1551 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1553 } 1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1555 } 1556 1557 llvm::FunctionCallee 1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1559 assert((IVSize == 32 || IVSize == 64) && 1560 "IV size is not compatible with the omp runtime"); 1561 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1562 : "__kmpc_for_static_init_4u") 1563 : (IVSigned ? "__kmpc_for_static_init_8" 1564 : "__kmpc_for_static_init_8u"); 1565 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1566 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1567 llvm::Type *TypeParams[] = { 1568 getIdentTyPointerTy(), // loc 1569 CGM.Int32Ty, // tid 1570 CGM.Int32Ty, // schedtype 1571 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1572 PtrTy, // p_lower 1573 PtrTy, // p_upper 1574 PtrTy, // p_stride 1575 ITy, // incr 1576 ITy // chunk 1577 }; 1578 auto *FnTy = 1579 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1580 return CGM.CreateRuntimeFunction(FnTy, Name); 1581 } 1582 1583 llvm::FunctionCallee 1584 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1585 assert((IVSize == 32 || IVSize == 64) && 1586 "IV size is not compatible with the omp runtime"); 1587 StringRef Name = 1588 IVSize == 32 1589 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1590 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1591 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1592 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1593 CGM.Int32Ty, // tid 1594 CGM.Int32Ty, // schedtype 1595 ITy, // lower 1596 ITy, // upper 1597 ITy, // stride 1598 ITy // chunk 1599 }; 1600 auto *FnTy = 1601 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1602 return CGM.CreateRuntimeFunction(FnTy, Name); 1603 } 1604 1605 llvm::FunctionCallee 1606 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1607 assert((IVSize == 32 || IVSize == 64) && 1608 "IV size is not compatible with the omp runtime"); 1609 StringRef Name = 1610 IVSize == 32 1611 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1612 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1613 llvm::Type *TypeParams[] = { 1614 getIdentTyPointerTy(), // loc 1615 CGM.Int32Ty, // tid 1616 }; 1617 auto *FnTy = 1618 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1619 return CGM.CreateRuntimeFunction(FnTy, Name); 1620 } 1621 1622 llvm::FunctionCallee 1623 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1624 assert((IVSize == 32 || IVSize == 64) && 1625 "IV size is not compatible with the omp runtime"); 1626 StringRef Name = 1627 IVSize == 32 1628 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1629 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1630 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1631 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1632 llvm::Type *TypeParams[] = { 1633 getIdentTyPointerTy(), // loc 1634 CGM.Int32Ty, // tid 1635 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1636 PtrTy, // p_lower 1637 PtrTy, // p_upper 1638 PtrTy // p_stride 1639 }; 1640 auto *FnTy = 1641 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1642 return CGM.CreateRuntimeFunction(FnTy, Name); 1643 } 1644 1645 /// Obtain information that uniquely identifies a target entry. This 1646 /// consists of the file and device IDs as well as line number associated with 1647 /// the relevant entry source location. 1648 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1649 unsigned &DeviceID, unsigned &FileID, 1650 unsigned &LineNum) { 1651 SourceManager &SM = C.getSourceManager(); 1652 1653 // The loc should be always valid and have a file ID (the user cannot use 1654 // #pragma directives in macros) 1655 1656 assert(Loc.isValid() && "Source location is expected to be always valid."); 1657 1658 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1659 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1660 1661 llvm::sys::fs::UniqueID ID; 1662 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1663 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1664 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1665 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1666 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1667 << PLoc.getFilename() << EC.message(); 1668 } 1669 1670 DeviceID = ID.getDevice(); 1671 FileID = ID.getFile(); 1672 LineNum = PLoc.getLine(); 1673 } 1674 1675 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1676 if (CGM.getLangOpts().OpenMPSimd) 1677 return Address::invalid(); 1678 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1679 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1680 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1681 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1682 HasRequiresUnifiedSharedMemory))) { 1683 SmallString<64> PtrName; 1684 { 1685 llvm::raw_svector_ostream OS(PtrName); 1686 OS << CGM.getMangledName(GlobalDecl(VD)); 1687 if (!VD->isExternallyVisible()) { 1688 unsigned DeviceID, FileID, Line; 1689 getTargetEntryUniqueInfo(CGM.getContext(), 1690 VD->getCanonicalDecl()->getBeginLoc(), 1691 DeviceID, FileID, Line); 1692 OS << llvm::format("_%x", FileID); 1693 } 1694 OS << "_decl_tgt_ref_ptr"; 1695 } 1696 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1697 if (!Ptr) { 1698 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1699 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1700 PtrName); 1701 1702 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1703 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1704 1705 if (!CGM.getLangOpts().OpenMPIsDevice) 1706 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1707 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1708 } 1709 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1710 } 1711 return Address::invalid(); 1712 } 1713 1714 llvm::Constant * 1715 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1716 assert(!CGM.getLangOpts().OpenMPUseTLS || 1717 !CGM.getContext().getTargetInfo().isTLSSupported()); 1718 // Lookup the entry, lazily creating it if necessary. 1719 std::string Suffix = getName({"cache", ""}); 1720 return getOrCreateInternalVariable( 1721 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1722 } 1723 1724 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1725 const VarDecl *VD, 1726 Address VDAddr, 1727 SourceLocation Loc) { 1728 if (CGM.getLangOpts().OpenMPUseTLS && 1729 CGM.getContext().getTargetInfo().isTLSSupported()) 1730 return VDAddr; 1731 1732 llvm::Type *VarTy = VDAddr.getElementType(); 1733 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1734 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1735 CGM.Int8PtrTy), 1736 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1737 getOrCreateThreadPrivateCache(VD)}; 1738 return Address(CGF.EmitRuntimeCall( 1739 OMPBuilder.getOrCreateRuntimeFunction( 1740 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1741 Args), 1742 VDAddr.getAlignment()); 1743 } 1744 1745 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1746 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1747 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1748 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1749 // library. 1750 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1751 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1752 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1753 OMPLoc); 1754 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1755 // to register constructor/destructor for variable. 1756 llvm::Value *Args[] = { 1757 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1758 Ctor, CopyCtor, Dtor}; 1759 CGF.EmitRuntimeCall( 1760 OMPBuilder.getOrCreateRuntimeFunction( 1761 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1762 Args); 1763 } 1764 1765 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1766 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1767 bool PerformInit, CodeGenFunction *CGF) { 1768 if (CGM.getLangOpts().OpenMPUseTLS && 1769 CGM.getContext().getTargetInfo().isTLSSupported()) 1770 return nullptr; 1771 1772 VD = VD->getDefinition(CGM.getContext()); 1773 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1774 QualType ASTTy = VD->getType(); 1775 1776 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1777 const Expr *Init = VD->getAnyInitializer(); 1778 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1779 // Generate function that re-emits the declaration's initializer into the 1780 // threadprivate copy of the variable VD 1781 CodeGenFunction CtorCGF(CGM); 1782 FunctionArgList Args; 1783 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1784 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1785 ImplicitParamDecl::Other); 1786 Args.push_back(&Dst); 1787 1788 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1789 CGM.getContext().VoidPtrTy, Args); 1790 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1791 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1792 llvm::Function *Fn = 1793 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1794 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1795 Args, Loc, Loc); 1796 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1797 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1798 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1799 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1800 Arg = CtorCGF.Builder.CreateElementBitCast( 1801 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1802 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1803 /*IsInitializer=*/true); 1804 ArgVal = CtorCGF.EmitLoadOfScalar( 1805 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1806 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1807 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1808 CtorCGF.FinishFunction(); 1809 Ctor = Fn; 1810 } 1811 if (VD->getType().isDestructedType() != QualType::DK_none) { 1812 // Generate function that emits destructor call for the threadprivate copy 1813 // of the variable VD 1814 CodeGenFunction DtorCGF(CGM); 1815 FunctionArgList Args; 1816 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1817 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1818 ImplicitParamDecl::Other); 1819 Args.push_back(&Dst); 1820 1821 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1822 CGM.getContext().VoidTy, Args); 1823 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1824 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1825 llvm::Function *Fn = 1826 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1827 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1828 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1829 Loc, Loc); 1830 // Create a scope with an artificial location for the body of this function. 1831 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1832 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1833 DtorCGF.GetAddrOfLocalVar(&Dst), 1834 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1835 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1836 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1837 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1838 DtorCGF.FinishFunction(); 1839 Dtor = Fn; 1840 } 1841 // Do not emit init function if it is not required. 1842 if (!Ctor && !Dtor) 1843 return nullptr; 1844 1845 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1846 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1847 /*isVarArg=*/false) 1848 ->getPointerTo(); 1849 // Copying constructor for the threadprivate variable. 1850 // Must be NULL - reserved by runtime, but currently it requires that this 1851 // parameter is always NULL. Otherwise it fires assertion. 1852 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1853 if (Ctor == nullptr) { 1854 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1855 /*isVarArg=*/false) 1856 ->getPointerTo(); 1857 Ctor = llvm::Constant::getNullValue(CtorTy); 1858 } 1859 if (Dtor == nullptr) { 1860 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1861 /*isVarArg=*/false) 1862 ->getPointerTo(); 1863 Dtor = llvm::Constant::getNullValue(DtorTy); 1864 } 1865 if (!CGF) { 1866 auto *InitFunctionTy = 1867 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1868 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1869 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1870 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1871 CodeGenFunction InitCGF(CGM); 1872 FunctionArgList ArgList; 1873 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1874 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1875 Loc, Loc); 1876 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1877 InitCGF.FinishFunction(); 1878 return InitFunction; 1879 } 1880 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1881 } 1882 return nullptr; 1883 } 1884 1885 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1886 llvm::GlobalVariable *Addr, 1887 bool PerformInit) { 1888 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1889 !CGM.getLangOpts().OpenMPIsDevice) 1890 return false; 1891 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1892 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1893 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1894 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1895 HasRequiresUnifiedSharedMemory)) 1896 return CGM.getLangOpts().OpenMPIsDevice; 1897 VD = VD->getDefinition(CGM.getContext()); 1898 assert(VD && "Unknown VarDecl"); 1899 1900 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1901 return CGM.getLangOpts().OpenMPIsDevice; 1902 1903 QualType ASTTy = VD->getType(); 1904 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1905 1906 // Produce the unique prefix to identify the new target regions. We use 1907 // the source location of the variable declaration which we know to not 1908 // conflict with any target region. 1909 unsigned DeviceID; 1910 unsigned FileID; 1911 unsigned Line; 1912 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1913 SmallString<128> Buffer, Out; 1914 { 1915 llvm::raw_svector_ostream OS(Buffer); 1916 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1917 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1918 } 1919 1920 const Expr *Init = VD->getAnyInitializer(); 1921 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1922 llvm::Constant *Ctor; 1923 llvm::Constant *ID; 1924 if (CGM.getLangOpts().OpenMPIsDevice) { 1925 // Generate function that re-emits the declaration's initializer into 1926 // the threadprivate copy of the variable VD 1927 CodeGenFunction CtorCGF(CGM); 1928 1929 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1930 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1931 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1932 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1933 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1934 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1935 FunctionArgList(), Loc, Loc); 1936 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1937 CtorCGF.EmitAnyExprToMem(Init, 1938 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1939 Init->getType().getQualifiers(), 1940 /*IsInitializer=*/true); 1941 CtorCGF.FinishFunction(); 1942 Ctor = Fn; 1943 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1944 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1945 } else { 1946 Ctor = new llvm::GlobalVariable( 1947 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1948 llvm::GlobalValue::PrivateLinkage, 1949 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1950 ID = Ctor; 1951 } 1952 1953 // Register the information for the entry associated with the constructor. 1954 Out.clear(); 1955 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1956 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1957 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1958 } 1959 if (VD->getType().isDestructedType() != QualType::DK_none) { 1960 llvm::Constant *Dtor; 1961 llvm::Constant *ID; 1962 if (CGM.getLangOpts().OpenMPIsDevice) { 1963 // Generate function that emits destructor call for the threadprivate 1964 // copy of the variable VD 1965 CodeGenFunction DtorCGF(CGM); 1966 1967 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1968 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1969 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1970 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1971 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1972 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1973 FunctionArgList(), Loc, Loc); 1974 // Create a scope with an artificial location for the body of this 1975 // function. 1976 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1977 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1978 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1979 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1980 DtorCGF.FinishFunction(); 1981 Dtor = Fn; 1982 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1983 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1984 } else { 1985 Dtor = new llvm::GlobalVariable( 1986 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1987 llvm::GlobalValue::PrivateLinkage, 1988 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1989 ID = Dtor; 1990 } 1991 // Register the information for the entry associated with the destructor. 1992 Out.clear(); 1993 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1994 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1995 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1996 } 1997 return CGM.getLangOpts().OpenMPIsDevice; 1998 } 1999 2000 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2001 QualType VarType, 2002 StringRef Name) { 2003 std::string Suffix = getName({"artificial", ""}); 2004 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2005 llvm::Value *GAddr = 2006 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2007 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2008 CGM.getTarget().isTLSSupported()) { 2009 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2010 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2011 } 2012 std::string CacheSuffix = getName({"cache", ""}); 2013 llvm::Value *Args[] = { 2014 emitUpdateLocation(CGF, SourceLocation()), 2015 getThreadID(CGF, SourceLocation()), 2016 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2017 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2018 /*isSigned=*/false), 2019 getOrCreateInternalVariable( 2020 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2021 return Address( 2022 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2023 CGF.EmitRuntimeCall( 2024 OMPBuilder.getOrCreateRuntimeFunction( 2025 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2026 Args), 2027 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2028 CGM.getContext().getTypeAlignInChars(VarType)); 2029 } 2030 2031 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2032 const RegionCodeGenTy &ThenGen, 2033 const RegionCodeGenTy &ElseGen) { 2034 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2035 2036 // If the condition constant folds and can be elided, try to avoid emitting 2037 // the condition and the dead arm of the if/else. 2038 bool CondConstant; 2039 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2040 if (CondConstant) 2041 ThenGen(CGF); 2042 else 2043 ElseGen(CGF); 2044 return; 2045 } 2046 2047 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2048 // emit the conditional branch. 2049 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2050 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2051 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2052 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2053 2054 // Emit the 'then' code. 2055 CGF.EmitBlock(ThenBlock); 2056 ThenGen(CGF); 2057 CGF.EmitBranch(ContBlock); 2058 // Emit the 'else' code if present. 2059 // There is no need to emit line number for unconditional branch. 2060 (void)ApplyDebugLocation::CreateEmpty(CGF); 2061 CGF.EmitBlock(ElseBlock); 2062 ElseGen(CGF); 2063 // There is no need to emit line number for unconditional branch. 2064 (void)ApplyDebugLocation::CreateEmpty(CGF); 2065 CGF.EmitBranch(ContBlock); 2066 // Emit the continuation block for code after the if. 2067 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2068 } 2069 2070 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2071 llvm::Function *OutlinedFn, 2072 ArrayRef<llvm::Value *> CapturedVars, 2073 const Expr *IfCond) { 2074 if (!CGF.HaveInsertPoint()) 2075 return; 2076 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2077 auto &M = CGM.getModule(); 2078 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2079 this](CodeGenFunction &CGF, PrePostActionTy &) { 2080 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *Args[] = { 2083 RTLoc, 2084 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2085 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2086 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2087 RealArgs.append(std::begin(Args), std::end(Args)); 2088 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2089 2090 llvm::FunctionCallee RTLFn = 2091 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2092 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2093 }; 2094 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2095 this](CodeGenFunction &CGF, PrePostActionTy &) { 2096 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2097 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2098 // Build calls: 2099 // __kmpc_serialized_parallel(&Loc, GTid); 2100 llvm::Value *Args[] = {RTLoc, ThreadID}; 2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2102 M, OMPRTL___kmpc_serialized_parallel), 2103 Args); 2104 2105 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2106 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2107 Address ZeroAddrBound = 2108 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2109 /*Name=*/".bound.zero.addr"); 2110 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2111 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2112 // ThreadId for serialized parallels is 0. 2113 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2114 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2115 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2116 2117 // Ensure we do not inline the function. This is trivially true for the ones 2118 // passed to __kmpc_fork_call but the ones calles in serialized regions 2119 // could be inlined. This is not a perfect but it is closer to the invariant 2120 // we want, namely, every data environment starts with a new function. 2121 // TODO: We should pass the if condition to the runtime function and do the 2122 // handling there. Much cleaner code. 2123 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2124 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2125 2126 // __kmpc_end_serialized_parallel(&Loc, GTid); 2127 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2128 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2129 M, OMPRTL___kmpc_end_serialized_parallel), 2130 EndArgs); 2131 }; 2132 if (IfCond) { 2133 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2134 } else { 2135 RegionCodeGenTy ThenRCG(ThenGen); 2136 ThenRCG(CGF); 2137 } 2138 } 2139 2140 // If we're inside an (outlined) parallel region, use the region info's 2141 // thread-ID variable (it is passed in a first argument of the outlined function 2142 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2143 // regular serial code region, get thread ID by calling kmp_int32 2144 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2145 // return the address of that temp. 2146 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2147 SourceLocation Loc) { 2148 if (auto *OMPRegionInfo = 2149 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2150 if (OMPRegionInfo->getThreadIDVariable()) 2151 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2152 2153 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2154 QualType Int32Ty = 2155 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2156 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2157 CGF.EmitStoreOfScalar(ThreadID, 2158 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2159 2160 return ThreadIDTemp; 2161 } 2162 2163 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2164 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2165 SmallString<256> Buffer; 2166 llvm::raw_svector_ostream Out(Buffer); 2167 Out << Name; 2168 StringRef RuntimeName = Out.str(); 2169 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2170 if (Elem.second) { 2171 assert(Elem.second->getType()->getPointerElementType() == Ty && 2172 "OMP internal variable has different type than requested"); 2173 return &*Elem.second; 2174 } 2175 2176 return Elem.second = new llvm::GlobalVariable( 2177 CGM.getModule(), Ty, /*IsConstant*/ false, 2178 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2179 Elem.first(), /*InsertBefore=*/nullptr, 2180 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2181 } 2182 2183 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2184 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2185 std::string Name = getName({Prefix, "var"}); 2186 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2187 } 2188 2189 namespace { 2190 /// Common pre(post)-action for different OpenMP constructs. 2191 class CommonActionTy final : public PrePostActionTy { 2192 llvm::FunctionCallee EnterCallee; 2193 ArrayRef<llvm::Value *> EnterArgs; 2194 llvm::FunctionCallee ExitCallee; 2195 ArrayRef<llvm::Value *> ExitArgs; 2196 bool Conditional; 2197 llvm::BasicBlock *ContBlock = nullptr; 2198 2199 public: 2200 CommonActionTy(llvm::FunctionCallee EnterCallee, 2201 ArrayRef<llvm::Value *> EnterArgs, 2202 llvm::FunctionCallee ExitCallee, 2203 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2204 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2205 ExitArgs(ExitArgs), Conditional(Conditional) {} 2206 void Enter(CodeGenFunction &CGF) override { 2207 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2208 if (Conditional) { 2209 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2210 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2211 ContBlock = CGF.createBasicBlock("omp_if.end"); 2212 // Generate the branch (If-stmt) 2213 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2214 CGF.EmitBlock(ThenBlock); 2215 } 2216 } 2217 void Done(CodeGenFunction &CGF) { 2218 // Emit the rest of blocks/branches 2219 CGF.EmitBranch(ContBlock); 2220 CGF.EmitBlock(ContBlock, true); 2221 } 2222 void Exit(CodeGenFunction &CGF) override { 2223 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2224 } 2225 }; 2226 } // anonymous namespace 2227 2228 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2229 StringRef CriticalName, 2230 const RegionCodeGenTy &CriticalOpGen, 2231 SourceLocation Loc, const Expr *Hint) { 2232 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2233 // CriticalOpGen(); 2234 // __kmpc_end_critical(ident_t *, gtid, Lock); 2235 // Prepare arguments and build a call to __kmpc_critical 2236 if (!CGF.HaveInsertPoint()) 2237 return; 2238 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2239 getCriticalRegionLock(CriticalName)}; 2240 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2241 std::end(Args)); 2242 if (Hint) { 2243 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2244 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2245 } 2246 CommonActionTy Action( 2247 OMPBuilder.getOrCreateRuntimeFunction( 2248 CGM.getModule(), 2249 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2250 EnterArgs, 2251 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2252 OMPRTL___kmpc_end_critical), 2253 Args); 2254 CriticalOpGen.setAction(Action); 2255 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2256 } 2257 2258 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2259 const RegionCodeGenTy &MasterOpGen, 2260 SourceLocation Loc) { 2261 if (!CGF.HaveInsertPoint()) 2262 return; 2263 // if(__kmpc_master(ident_t *, gtid)) { 2264 // MasterOpGen(); 2265 // __kmpc_end_master(ident_t *, gtid); 2266 // } 2267 // Prepare arguments and build a call to __kmpc_master 2268 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2269 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_master), 2271 Args, 2272 OMPBuilder.getOrCreateRuntimeFunction( 2273 CGM.getModule(), OMPRTL___kmpc_end_master), 2274 Args, 2275 /*Conditional=*/true); 2276 MasterOpGen.setAction(Action); 2277 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2278 Action.Done(CGF); 2279 } 2280 2281 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2282 const RegionCodeGenTy &MaskedOpGen, 2283 SourceLocation Loc, const Expr *Filter) { 2284 if (!CGF.HaveInsertPoint()) 2285 return; 2286 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2287 // MaskedOpGen(); 2288 // __kmpc_end_masked(iden_t *, gtid); 2289 // } 2290 // Prepare arguments and build a call to __kmpc_masked 2291 llvm::Value *FilterVal = Filter 2292 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2293 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2294 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2295 FilterVal}; 2296 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2297 getThreadID(CGF, Loc)}; 2298 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2299 CGM.getModule(), OMPRTL___kmpc_masked), 2300 Args, 2301 OMPBuilder.getOrCreateRuntimeFunction( 2302 CGM.getModule(), OMPRTL___kmpc_end_masked), 2303 ArgsEnd, 2304 /*Conditional=*/true); 2305 MaskedOpGen.setAction(Action); 2306 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2307 Action.Done(CGF); 2308 } 2309 2310 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2311 SourceLocation Loc) { 2312 if (!CGF.HaveInsertPoint()) 2313 return; 2314 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2315 OMPBuilder.createTaskyield(CGF.Builder); 2316 } else { 2317 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2318 llvm::Value *Args[] = { 2319 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2320 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2321 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2322 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2323 Args); 2324 } 2325 2326 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2327 Region->emitUntiedSwitch(CGF); 2328 } 2329 2330 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2331 const RegionCodeGenTy &TaskgroupOpGen, 2332 SourceLocation Loc) { 2333 if (!CGF.HaveInsertPoint()) 2334 return; 2335 // __kmpc_taskgroup(ident_t *, gtid); 2336 // TaskgroupOpGen(); 2337 // __kmpc_end_taskgroup(ident_t *, gtid); 2338 // Prepare arguments and build a call to __kmpc_taskgroup 2339 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2340 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2341 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2342 Args, 2343 OMPBuilder.getOrCreateRuntimeFunction( 2344 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2345 Args); 2346 TaskgroupOpGen.setAction(Action); 2347 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2348 } 2349 2350 /// Given an array of pointers to variables, project the address of a 2351 /// given variable. 2352 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2353 unsigned Index, const VarDecl *Var) { 2354 // Pull out the pointer to the variable. 2355 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2356 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2357 2358 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2359 Addr = CGF.Builder.CreateElementBitCast( 2360 Addr, CGF.ConvertTypeForMem(Var->getType())); 2361 return Addr; 2362 } 2363 2364 static llvm::Value *emitCopyprivateCopyFunction( 2365 CodeGenModule &CGM, llvm::Type *ArgsType, 2366 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2367 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2368 SourceLocation Loc) { 2369 ASTContext &C = CGM.getContext(); 2370 // void copy_func(void *LHSArg, void *RHSArg); 2371 FunctionArgList Args; 2372 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2373 ImplicitParamDecl::Other); 2374 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2375 ImplicitParamDecl::Other); 2376 Args.push_back(&LHSArg); 2377 Args.push_back(&RHSArg); 2378 const auto &CGFI = 2379 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2380 std::string Name = 2381 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2382 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2383 llvm::GlobalValue::InternalLinkage, Name, 2384 &CGM.getModule()); 2385 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2386 Fn->setDoesNotRecurse(); 2387 CodeGenFunction CGF(CGM); 2388 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2389 // Dest = (void*[n])(LHSArg); 2390 // Src = (void*[n])(RHSArg); 2391 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2392 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2393 ArgsType), CGF.getPointerAlign()); 2394 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2395 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2396 ArgsType), CGF.getPointerAlign()); 2397 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2398 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2399 // ... 2400 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2401 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2402 const auto *DestVar = 2403 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2404 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2405 2406 const auto *SrcVar = 2407 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2408 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2409 2410 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2411 QualType Type = VD->getType(); 2412 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2413 } 2414 CGF.FinishFunction(); 2415 return Fn; 2416 } 2417 2418 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2419 const RegionCodeGenTy &SingleOpGen, 2420 SourceLocation Loc, 2421 ArrayRef<const Expr *> CopyprivateVars, 2422 ArrayRef<const Expr *> SrcExprs, 2423 ArrayRef<const Expr *> DstExprs, 2424 ArrayRef<const Expr *> AssignmentOps) { 2425 if (!CGF.HaveInsertPoint()) 2426 return; 2427 assert(CopyprivateVars.size() == SrcExprs.size() && 2428 CopyprivateVars.size() == DstExprs.size() && 2429 CopyprivateVars.size() == AssignmentOps.size()); 2430 ASTContext &C = CGM.getContext(); 2431 // int32 did_it = 0; 2432 // if(__kmpc_single(ident_t *, gtid)) { 2433 // SingleOpGen(); 2434 // __kmpc_end_single(ident_t *, gtid); 2435 // did_it = 1; 2436 // } 2437 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2438 // <copy_func>, did_it); 2439 2440 Address DidIt = Address::invalid(); 2441 if (!CopyprivateVars.empty()) { 2442 // int32 did_it = 0; 2443 QualType KmpInt32Ty = 2444 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2445 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2446 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2447 } 2448 // Prepare arguments and build a call to __kmpc_single 2449 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2450 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2451 CGM.getModule(), OMPRTL___kmpc_single), 2452 Args, 2453 OMPBuilder.getOrCreateRuntimeFunction( 2454 CGM.getModule(), OMPRTL___kmpc_end_single), 2455 Args, 2456 /*Conditional=*/true); 2457 SingleOpGen.setAction(Action); 2458 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2459 if (DidIt.isValid()) { 2460 // did_it = 1; 2461 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2462 } 2463 Action.Done(CGF); 2464 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2465 // <copy_func>, did_it); 2466 if (DidIt.isValid()) { 2467 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2468 QualType CopyprivateArrayTy = C.getConstantArrayType( 2469 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2470 /*IndexTypeQuals=*/0); 2471 // Create a list of all private variables for copyprivate. 2472 Address CopyprivateList = 2473 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2474 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2475 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2476 CGF.Builder.CreateStore( 2477 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2478 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2479 CGF.VoidPtrTy), 2480 Elem); 2481 } 2482 // Build function that copies private values from single region to all other 2483 // threads in the corresponding parallel region. 2484 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2485 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2486 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2487 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2488 Address CL = 2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2490 CGF.VoidPtrTy); 2491 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2492 llvm::Value *Args[] = { 2493 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2494 getThreadID(CGF, Loc), // i32 <gtid> 2495 BufSize, // size_t <buf_size> 2496 CL.getPointer(), // void *<copyprivate list> 2497 CpyFn, // void (*) (void *, void *) <copy_func> 2498 DidItVal // i32 did_it 2499 }; 2500 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2501 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2502 Args); 2503 } 2504 } 2505 2506 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2507 const RegionCodeGenTy &OrderedOpGen, 2508 SourceLocation Loc, bool IsThreads) { 2509 if (!CGF.HaveInsertPoint()) 2510 return; 2511 // __kmpc_ordered(ident_t *, gtid); 2512 // OrderedOpGen(); 2513 // __kmpc_end_ordered(ident_t *, gtid); 2514 // Prepare arguments and build a call to __kmpc_ordered 2515 if (IsThreads) { 2516 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2517 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2518 CGM.getModule(), OMPRTL___kmpc_ordered), 2519 Args, 2520 OMPBuilder.getOrCreateRuntimeFunction( 2521 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2522 Args); 2523 OrderedOpGen.setAction(Action); 2524 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2525 return; 2526 } 2527 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2528 } 2529 2530 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2531 unsigned Flags; 2532 if (Kind == OMPD_for) 2533 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2534 else if (Kind == OMPD_sections) 2535 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2536 else if (Kind == OMPD_single) 2537 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2538 else if (Kind == OMPD_barrier) 2539 Flags = OMP_IDENT_BARRIER_EXPL; 2540 else 2541 Flags = OMP_IDENT_BARRIER_IMPL; 2542 return Flags; 2543 } 2544 2545 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2546 CodeGenFunction &CGF, const OMPLoopDirective &S, 2547 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2548 // Check if the loop directive is actually a doacross loop directive. In this 2549 // case choose static, 1 schedule. 2550 if (llvm::any_of( 2551 S.getClausesOfKind<OMPOrderedClause>(), 2552 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2553 ScheduleKind = OMPC_SCHEDULE_static; 2554 // Chunk size is 1 in this case. 2555 llvm::APInt ChunkSize(32, 1); 2556 ChunkExpr = IntegerLiteral::Create( 2557 CGF.getContext(), ChunkSize, 2558 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2559 SourceLocation()); 2560 } 2561 } 2562 2563 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2564 OpenMPDirectiveKind Kind, bool EmitChecks, 2565 bool ForceSimpleCall) { 2566 // Check if we should use the OMPBuilder 2567 auto *OMPRegionInfo = 2568 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2569 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2570 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2571 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2572 return; 2573 } 2574 2575 if (!CGF.HaveInsertPoint()) 2576 return; 2577 // Build call __kmpc_cancel_barrier(loc, thread_id); 2578 // Build call __kmpc_barrier(loc, thread_id); 2579 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2580 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2581 // thread_id); 2582 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2583 getThreadID(CGF, Loc)}; 2584 if (OMPRegionInfo) { 2585 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2586 llvm::Value *Result = CGF.EmitRuntimeCall( 2587 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2588 OMPRTL___kmpc_cancel_barrier), 2589 Args); 2590 if (EmitChecks) { 2591 // if (__kmpc_cancel_barrier()) { 2592 // exit from construct; 2593 // } 2594 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2595 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2596 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2597 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2598 CGF.EmitBlock(ExitBB); 2599 // exit from construct; 2600 CodeGenFunction::JumpDest CancelDestination = 2601 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2602 CGF.EmitBranchThroughCleanup(CancelDestination); 2603 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2604 } 2605 return; 2606 } 2607 } 2608 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2609 CGM.getModule(), OMPRTL___kmpc_barrier), 2610 Args); 2611 } 2612 2613 /// Map the OpenMP loop schedule to the runtime enumeration. 2614 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2615 bool Chunked, bool Ordered) { 2616 switch (ScheduleKind) { 2617 case OMPC_SCHEDULE_static: 2618 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2619 : (Ordered ? OMP_ord_static : OMP_sch_static); 2620 case OMPC_SCHEDULE_dynamic: 2621 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2622 case OMPC_SCHEDULE_guided: 2623 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2624 case OMPC_SCHEDULE_runtime: 2625 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2626 case OMPC_SCHEDULE_auto: 2627 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2628 case OMPC_SCHEDULE_unknown: 2629 assert(!Chunked && "chunk was specified but schedule kind not known"); 2630 return Ordered ? OMP_ord_static : OMP_sch_static; 2631 } 2632 llvm_unreachable("Unexpected runtime schedule"); 2633 } 2634 2635 /// Map the OpenMP distribute schedule to the runtime enumeration. 2636 static OpenMPSchedType 2637 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2638 // only static is allowed for dist_schedule 2639 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2640 } 2641 2642 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2643 bool Chunked) const { 2644 OpenMPSchedType Schedule = 2645 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2646 return Schedule == OMP_sch_static; 2647 } 2648 2649 bool CGOpenMPRuntime::isStaticNonchunked( 2650 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2651 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2652 return Schedule == OMP_dist_sch_static; 2653 } 2654 2655 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2656 bool Chunked) const { 2657 OpenMPSchedType Schedule = 2658 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2659 return Schedule == OMP_sch_static_chunked; 2660 } 2661 2662 bool CGOpenMPRuntime::isStaticChunked( 2663 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2664 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2665 return Schedule == OMP_dist_sch_static_chunked; 2666 } 2667 2668 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2669 OpenMPSchedType Schedule = 2670 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2671 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2672 return Schedule != OMP_sch_static; 2673 } 2674 2675 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2676 OpenMPScheduleClauseModifier M1, 2677 OpenMPScheduleClauseModifier M2) { 2678 int Modifier = 0; 2679 switch (M1) { 2680 case OMPC_SCHEDULE_MODIFIER_monotonic: 2681 Modifier = OMP_sch_modifier_monotonic; 2682 break; 2683 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2684 Modifier = OMP_sch_modifier_nonmonotonic; 2685 break; 2686 case OMPC_SCHEDULE_MODIFIER_simd: 2687 if (Schedule == OMP_sch_static_chunked) 2688 Schedule = OMP_sch_static_balanced_chunked; 2689 break; 2690 case OMPC_SCHEDULE_MODIFIER_last: 2691 case OMPC_SCHEDULE_MODIFIER_unknown: 2692 break; 2693 } 2694 switch (M2) { 2695 case OMPC_SCHEDULE_MODIFIER_monotonic: 2696 Modifier = OMP_sch_modifier_monotonic; 2697 break; 2698 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2699 Modifier = OMP_sch_modifier_nonmonotonic; 2700 break; 2701 case OMPC_SCHEDULE_MODIFIER_simd: 2702 if (Schedule == OMP_sch_static_chunked) 2703 Schedule = OMP_sch_static_balanced_chunked; 2704 break; 2705 case OMPC_SCHEDULE_MODIFIER_last: 2706 case OMPC_SCHEDULE_MODIFIER_unknown: 2707 break; 2708 } 2709 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2710 // If the static schedule kind is specified or if the ordered clause is 2711 // specified, and if the nonmonotonic modifier is not specified, the effect is 2712 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2713 // modifier is specified, the effect is as if the nonmonotonic modifier is 2714 // specified. 2715 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2716 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2719 Schedule == OMP_dist_sch_static_chunked || 2720 Schedule == OMP_dist_sch_static)) 2721 Modifier = OMP_sch_modifier_nonmonotonic; 2722 } 2723 return Schedule | Modifier; 2724 } 2725 2726 void CGOpenMPRuntime::emitForDispatchInit( 2727 CodeGenFunction &CGF, SourceLocation Loc, 2728 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2729 bool Ordered, const DispatchRTInput &DispatchValues) { 2730 if (!CGF.HaveInsertPoint()) 2731 return; 2732 OpenMPSchedType Schedule = getRuntimeSchedule( 2733 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2734 assert(Ordered || 2735 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2736 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2737 Schedule != OMP_sch_static_balanced_chunked)); 2738 // Call __kmpc_dispatch_init( 2739 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2740 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2741 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2742 2743 // If the Chunk was not specified in the clause - use default value 1. 2744 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2745 : CGF.Builder.getIntN(IVSize, 1); 2746 llvm::Value *Args[] = { 2747 emitUpdateLocation(CGF, Loc), 2748 getThreadID(CGF, Loc), 2749 CGF.Builder.getInt32(addMonoNonMonoModifier( 2750 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2751 DispatchValues.LB, // Lower 2752 DispatchValues.UB, // Upper 2753 CGF.Builder.getIntN(IVSize, 1), // Stride 2754 Chunk // Chunk 2755 }; 2756 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2757 } 2758 2759 static void emitForStaticInitCall( 2760 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2761 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2762 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2763 const CGOpenMPRuntime::StaticRTInput &Values) { 2764 if (!CGF.HaveInsertPoint()) 2765 return; 2766 2767 assert(!Values.Ordered); 2768 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2769 Schedule == OMP_sch_static_balanced_chunked || 2770 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2771 Schedule == OMP_dist_sch_static || 2772 Schedule == OMP_dist_sch_static_chunked); 2773 2774 // Call __kmpc_for_static_init( 2775 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2776 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2777 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2778 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2779 llvm::Value *Chunk = Values.Chunk; 2780 if (Chunk == nullptr) { 2781 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2782 Schedule == OMP_dist_sch_static) && 2783 "expected static non-chunked schedule"); 2784 // If the Chunk was not specified in the clause - use default value 1. 2785 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2786 } else { 2787 assert((Schedule == OMP_sch_static_chunked || 2788 Schedule == OMP_sch_static_balanced_chunked || 2789 Schedule == OMP_ord_static_chunked || 2790 Schedule == OMP_dist_sch_static_chunked) && 2791 "expected static chunked schedule"); 2792 } 2793 llvm::Value *Args[] = { 2794 UpdateLocation, 2795 ThreadId, 2796 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2797 M2)), // Schedule type 2798 Values.IL.getPointer(), // &isLastIter 2799 Values.LB.getPointer(), // &LB 2800 Values.UB.getPointer(), // &UB 2801 Values.ST.getPointer(), // &Stride 2802 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2803 Chunk // Chunk 2804 }; 2805 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2806 } 2807 2808 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2809 SourceLocation Loc, 2810 OpenMPDirectiveKind DKind, 2811 const OpenMPScheduleTy &ScheduleKind, 2812 const StaticRTInput &Values) { 2813 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2814 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2815 assert(isOpenMPWorksharingDirective(DKind) && 2816 "Expected loop-based or sections-based directive."); 2817 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2818 isOpenMPLoopDirective(DKind) 2819 ? OMP_IDENT_WORK_LOOP 2820 : OMP_IDENT_WORK_SECTIONS); 2821 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2822 llvm::FunctionCallee StaticInitFunction = 2823 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2824 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2825 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2826 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2827 } 2828 2829 void CGOpenMPRuntime::emitDistributeStaticInit( 2830 CodeGenFunction &CGF, SourceLocation Loc, 2831 OpenMPDistScheduleClauseKind SchedKind, 2832 const CGOpenMPRuntime::StaticRTInput &Values) { 2833 OpenMPSchedType ScheduleNum = 2834 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2835 llvm::Value *UpdatedLocation = 2836 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2837 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2838 llvm::FunctionCallee StaticInitFunction = 2839 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2840 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2841 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2842 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2843 } 2844 2845 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2846 SourceLocation Loc, 2847 OpenMPDirectiveKind DKind) { 2848 if (!CGF.HaveInsertPoint()) 2849 return; 2850 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2851 llvm::Value *Args[] = { 2852 emitUpdateLocation(CGF, Loc, 2853 isOpenMPDistributeDirective(DKind) 2854 ? OMP_IDENT_WORK_DISTRIBUTE 2855 : isOpenMPLoopDirective(DKind) 2856 ? OMP_IDENT_WORK_LOOP 2857 : OMP_IDENT_WORK_SECTIONS), 2858 getThreadID(CGF, Loc)}; 2859 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2860 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2861 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2862 Args); 2863 } 2864 2865 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2866 SourceLocation Loc, 2867 unsigned IVSize, 2868 bool IVSigned) { 2869 if (!CGF.HaveInsertPoint()) 2870 return; 2871 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2872 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2873 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2874 } 2875 2876 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2877 SourceLocation Loc, unsigned IVSize, 2878 bool IVSigned, Address IL, 2879 Address LB, Address UB, 2880 Address ST) { 2881 // Call __kmpc_dispatch_next( 2882 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2883 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2884 // kmp_int[32|64] *p_stride); 2885 llvm::Value *Args[] = { 2886 emitUpdateLocation(CGF, Loc), 2887 getThreadID(CGF, Loc), 2888 IL.getPointer(), // &isLastIter 2889 LB.getPointer(), // &Lower 2890 UB.getPointer(), // &Upper 2891 ST.getPointer() // &Stride 2892 }; 2893 llvm::Value *Call = 2894 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2895 return CGF.EmitScalarConversion( 2896 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2897 CGF.getContext().BoolTy, Loc); 2898 } 2899 2900 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2901 llvm::Value *NumThreads, 2902 SourceLocation Loc) { 2903 if (!CGF.HaveInsertPoint()) 2904 return; 2905 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2906 llvm::Value *Args[] = { 2907 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2908 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2910 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2911 Args); 2912 } 2913 2914 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2915 ProcBindKind ProcBind, 2916 SourceLocation Loc) { 2917 if (!CGF.HaveInsertPoint()) 2918 return; 2919 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2920 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2921 llvm::Value *Args[] = { 2922 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2923 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2924 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2925 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2926 Args); 2927 } 2928 2929 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2930 SourceLocation Loc, llvm::AtomicOrdering AO) { 2931 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2932 OMPBuilder.createFlush(CGF.Builder); 2933 } else { 2934 if (!CGF.HaveInsertPoint()) 2935 return; 2936 // Build call void __kmpc_flush(ident_t *loc) 2937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2938 CGM.getModule(), OMPRTL___kmpc_flush), 2939 emitUpdateLocation(CGF, Loc)); 2940 } 2941 } 2942 2943 namespace { 2944 /// Indexes of fields for type kmp_task_t. 2945 enum KmpTaskTFields { 2946 /// List of shared variables. 2947 KmpTaskTShareds, 2948 /// Task routine. 2949 KmpTaskTRoutine, 2950 /// Partition id for the untied tasks. 2951 KmpTaskTPartId, 2952 /// Function with call of destructors for private variables. 2953 Data1, 2954 /// Task priority. 2955 Data2, 2956 /// (Taskloops only) Lower bound. 2957 KmpTaskTLowerBound, 2958 /// (Taskloops only) Upper bound. 2959 KmpTaskTUpperBound, 2960 /// (Taskloops only) Stride. 2961 KmpTaskTStride, 2962 /// (Taskloops only) Is last iteration flag. 2963 KmpTaskTLastIter, 2964 /// (Taskloops only) Reduction data. 2965 KmpTaskTReductions, 2966 }; 2967 } // anonymous namespace 2968 2969 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2970 return OffloadEntriesTargetRegion.empty() && 2971 OffloadEntriesDeviceGlobalVar.empty(); 2972 } 2973 2974 /// Initialize target region entry. 2975 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2976 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2977 StringRef ParentName, unsigned LineNum, 2978 unsigned Order) { 2979 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2980 "only required for the device " 2981 "code generation."); 2982 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2983 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2984 OMPTargetRegionEntryTargetRegion); 2985 ++OffloadingEntriesNum; 2986 } 2987 2988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2989 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2990 StringRef ParentName, unsigned LineNum, 2991 llvm::Constant *Addr, llvm::Constant *ID, 2992 OMPTargetRegionEntryKind Flags) { 2993 // If we are emitting code for a target, the entry is already initialized, 2994 // only has to be registered. 2995 if (CGM.getLangOpts().OpenMPIsDevice) { 2996 // This could happen if the device compilation is invoked standalone. 2997 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2998 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2999 OffloadingEntriesNum); 3000 auto &Entry = 3001 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3002 Entry.setAddress(Addr); 3003 Entry.setID(ID); 3004 Entry.setFlags(Flags); 3005 } else { 3006 if (Flags == 3007 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3008 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3009 /*IgnoreAddressId*/ true)) 3010 return; 3011 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3012 "Target region entry already registered!"); 3013 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3014 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3015 ++OffloadingEntriesNum; 3016 } 3017 } 3018 3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3020 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3021 bool IgnoreAddressId) const { 3022 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3023 if (PerDevice == OffloadEntriesTargetRegion.end()) 3024 return false; 3025 auto PerFile = PerDevice->second.find(FileID); 3026 if (PerFile == PerDevice->second.end()) 3027 return false; 3028 auto PerParentName = PerFile->second.find(ParentName); 3029 if (PerParentName == PerFile->second.end()) 3030 return false; 3031 auto PerLine = PerParentName->second.find(LineNum); 3032 if (PerLine == PerParentName->second.end()) 3033 return false; 3034 // Fail if this entry is already registered. 3035 if (!IgnoreAddressId && 3036 (PerLine->second.getAddress() || PerLine->second.getID())) 3037 return false; 3038 return true; 3039 } 3040 3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3042 const OffloadTargetRegionEntryInfoActTy &Action) { 3043 // Scan all target region entries and perform the provided action. 3044 for (const auto &D : OffloadEntriesTargetRegion) 3045 for (const auto &F : D.second) 3046 for (const auto &P : F.second) 3047 for (const auto &L : P.second) 3048 Action(D.first, F.first, P.first(), L.first, L.second); 3049 } 3050 3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3052 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3053 OMPTargetGlobalVarEntryKind Flags, 3054 unsigned Order) { 3055 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3056 "only required for the device " 3057 "code generation."); 3058 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3059 ++OffloadingEntriesNum; 3060 } 3061 3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3063 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3064 CharUnits VarSize, 3065 OMPTargetGlobalVarEntryKind Flags, 3066 llvm::GlobalValue::LinkageTypes Linkage) { 3067 if (CGM.getLangOpts().OpenMPIsDevice) { 3068 // This could happen if the device compilation is invoked standalone. 3069 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3070 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3071 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3072 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3073 "Resetting with the new address."); 3074 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3075 if (Entry.getVarSize().isZero()) { 3076 Entry.setVarSize(VarSize); 3077 Entry.setLinkage(Linkage); 3078 } 3079 return; 3080 } 3081 Entry.setVarSize(VarSize); 3082 Entry.setLinkage(Linkage); 3083 Entry.setAddress(Addr); 3084 } else { 3085 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3086 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3087 assert(Entry.isValid() && Entry.getFlags() == Flags && 3088 "Entry not initialized!"); 3089 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3090 "Resetting with the new address."); 3091 if (Entry.getVarSize().isZero()) { 3092 Entry.setVarSize(VarSize); 3093 Entry.setLinkage(Linkage); 3094 } 3095 return; 3096 } 3097 OffloadEntriesDeviceGlobalVar.try_emplace( 3098 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3099 ++OffloadingEntriesNum; 3100 } 3101 } 3102 3103 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3104 actOnDeviceGlobalVarEntriesInfo( 3105 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3106 // Scan all target region entries and perform the provided action. 3107 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3108 Action(E.getKey(), E.getValue()); 3109 } 3110 3111 void CGOpenMPRuntime::createOffloadEntry( 3112 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3113 llvm::GlobalValue::LinkageTypes Linkage) { 3114 StringRef Name = Addr->getName(); 3115 llvm::Module &M = CGM.getModule(); 3116 llvm::LLVMContext &C = M.getContext(); 3117 3118 // Create constant string with the name. 3119 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3120 3121 std::string StringName = getName({"omp_offloading", "entry_name"}); 3122 auto *Str = new llvm::GlobalVariable( 3123 M, StrPtrInit->getType(), /*isConstant=*/true, 3124 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3125 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3126 3127 llvm::Constant *Data[] = { 3128 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3129 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3130 llvm::ConstantInt::get(CGM.SizeTy, Size), 3131 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3132 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3133 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3134 llvm::GlobalVariable *Entry = createGlobalStruct( 3135 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3136 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3137 3138 // The entry has to be created in the section the linker expects it to be. 3139 Entry->setSection("omp_offloading_entries"); 3140 } 3141 3142 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3143 // Emit the offloading entries and metadata so that the device codegen side 3144 // can easily figure out what to emit. The produced metadata looks like 3145 // this: 3146 // 3147 // !omp_offload.info = !{!1, ...} 3148 // 3149 // Right now we only generate metadata for function that contain target 3150 // regions. 3151 3152 // If we are in simd mode or there are no entries, we don't need to do 3153 // anything. 3154 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3155 return; 3156 3157 llvm::Module &M = CGM.getModule(); 3158 llvm::LLVMContext &C = M.getContext(); 3159 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3160 SourceLocation, StringRef>, 3161 16> 3162 OrderedEntries(OffloadEntriesInfoManager.size()); 3163 llvm::SmallVector<StringRef, 16> ParentFunctions( 3164 OffloadEntriesInfoManager.size()); 3165 3166 // Auxiliary methods to create metadata values and strings. 3167 auto &&GetMDInt = [this](unsigned V) { 3168 return llvm::ConstantAsMetadata::get( 3169 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3170 }; 3171 3172 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3173 3174 // Create the offloading info metadata node. 3175 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3176 3177 // Create function that emits metadata for each target region entry; 3178 auto &&TargetRegionMetadataEmitter = 3179 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3180 &GetMDString]( 3181 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3182 unsigned Line, 3183 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3184 // Generate metadata for target regions. Each entry of this metadata 3185 // contains: 3186 // - Entry 0 -> Kind of this type of metadata (0). 3187 // - Entry 1 -> Device ID of the file where the entry was identified. 3188 // - Entry 2 -> File ID of the file where the entry was identified. 3189 // - Entry 3 -> Mangled name of the function where the entry was 3190 // identified. 3191 // - Entry 4 -> Line in the file where the entry was identified. 3192 // - Entry 5 -> Order the entry was created. 3193 // The first element of the metadata node is the kind. 3194 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3195 GetMDInt(FileID), GetMDString(ParentName), 3196 GetMDInt(Line), GetMDInt(E.getOrder())}; 3197 3198 SourceLocation Loc; 3199 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3200 E = CGM.getContext().getSourceManager().fileinfo_end(); 3201 I != E; ++I) { 3202 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3203 I->getFirst()->getUniqueID().getFile() == FileID) { 3204 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3205 I->getFirst(), Line, 1); 3206 break; 3207 } 3208 } 3209 // Save this entry in the right position of the ordered entries array. 3210 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3211 ParentFunctions[E.getOrder()] = ParentName; 3212 3213 // Add metadata to the named metadata node. 3214 MD->addOperand(llvm::MDNode::get(C, Ops)); 3215 }; 3216 3217 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3218 TargetRegionMetadataEmitter); 3219 3220 // Create function that emits metadata for each device global variable entry; 3221 auto &&DeviceGlobalVarMetadataEmitter = 3222 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3223 MD](StringRef MangledName, 3224 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3225 &E) { 3226 // Generate metadata for global variables. Each entry of this metadata 3227 // contains: 3228 // - Entry 0 -> Kind of this type of metadata (1). 3229 // - Entry 1 -> Mangled name of the variable. 3230 // - Entry 2 -> Declare target kind. 3231 // - Entry 3 -> Order the entry was created. 3232 // The first element of the metadata node is the kind. 3233 llvm::Metadata *Ops[] = { 3234 GetMDInt(E.getKind()), GetMDString(MangledName), 3235 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3236 3237 // Save this entry in the right position of the ordered entries array. 3238 OrderedEntries[E.getOrder()] = 3239 std::make_tuple(&E, SourceLocation(), MangledName); 3240 3241 // Add metadata to the named metadata node. 3242 MD->addOperand(llvm::MDNode::get(C, Ops)); 3243 }; 3244 3245 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3246 DeviceGlobalVarMetadataEmitter); 3247 3248 for (const auto &E : OrderedEntries) { 3249 assert(std::get<0>(E) && "All ordered entries must exist!"); 3250 if (const auto *CE = 3251 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3252 std::get<0>(E))) { 3253 if (!CE->getID() || !CE->getAddress()) { 3254 // Do not blame the entry if the parent funtion is not emitted. 3255 StringRef FnName = ParentFunctions[CE->getOrder()]; 3256 if (!CGM.GetGlobalValue(FnName)) 3257 continue; 3258 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3259 DiagnosticsEngine::Error, 3260 "Offloading entry for target region in %0 is incorrect: either the " 3261 "address or the ID is invalid."); 3262 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3263 continue; 3264 } 3265 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3266 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3267 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3268 OffloadEntryInfoDeviceGlobalVar>( 3269 std::get<0>(E))) { 3270 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3271 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3272 CE->getFlags()); 3273 switch (Flags) { 3274 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3275 if (CGM.getLangOpts().OpenMPIsDevice && 3276 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3277 continue; 3278 if (!CE->getAddress()) { 3279 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3280 DiagnosticsEngine::Error, "Offloading entry for declare target " 3281 "variable %0 is incorrect: the " 3282 "address is invalid."); 3283 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3284 continue; 3285 } 3286 // The vaiable has no definition - no need to add the entry. 3287 if (CE->getVarSize().isZero()) 3288 continue; 3289 break; 3290 } 3291 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3292 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3293 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3294 "Declaret target link address is set."); 3295 if (CGM.getLangOpts().OpenMPIsDevice) 3296 continue; 3297 if (!CE->getAddress()) { 3298 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3299 DiagnosticsEngine::Error, 3300 "Offloading entry for declare target variable is incorrect: the " 3301 "address is invalid."); 3302 CGM.getDiags().Report(DiagID); 3303 continue; 3304 } 3305 break; 3306 } 3307 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3308 CE->getVarSize().getQuantity(), Flags, 3309 CE->getLinkage()); 3310 } else { 3311 llvm_unreachable("Unsupported entry kind."); 3312 } 3313 } 3314 } 3315 3316 /// Loads all the offload entries information from the host IR 3317 /// metadata. 3318 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3319 // If we are in target mode, load the metadata from the host IR. This code has 3320 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3321 3322 if (!CGM.getLangOpts().OpenMPIsDevice) 3323 return; 3324 3325 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3326 return; 3327 3328 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3329 if (auto EC = Buf.getError()) { 3330 CGM.getDiags().Report(diag::err_cannot_open_file) 3331 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3332 return; 3333 } 3334 3335 llvm::LLVMContext C; 3336 auto ME = expectedToErrorOrAndEmitErrors( 3337 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3338 3339 if (auto EC = ME.getError()) { 3340 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3341 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3342 CGM.getDiags().Report(DiagID) 3343 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3344 return; 3345 } 3346 3347 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3348 if (!MD) 3349 return; 3350 3351 for (llvm::MDNode *MN : MD->operands()) { 3352 auto &&GetMDInt = [MN](unsigned Idx) { 3353 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3354 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3355 }; 3356 3357 auto &&GetMDString = [MN](unsigned Idx) { 3358 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3359 return V->getString(); 3360 }; 3361 3362 switch (GetMDInt(0)) { 3363 default: 3364 llvm_unreachable("Unexpected metadata!"); 3365 break; 3366 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3367 OffloadingEntryInfoTargetRegion: 3368 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3369 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3370 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3371 /*Order=*/GetMDInt(5)); 3372 break; 3373 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3374 OffloadingEntryInfoDeviceGlobalVar: 3375 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3376 /*MangledName=*/GetMDString(1), 3377 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3378 /*Flags=*/GetMDInt(2)), 3379 /*Order=*/GetMDInt(3)); 3380 break; 3381 } 3382 } 3383 } 3384 3385 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3386 if (!KmpRoutineEntryPtrTy) { 3387 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3388 ASTContext &C = CGM.getContext(); 3389 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3390 FunctionProtoType::ExtProtoInfo EPI; 3391 KmpRoutineEntryPtrQTy = C.getPointerType( 3392 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3393 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3394 } 3395 } 3396 3397 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3398 // Make sure the type of the entry is already created. This is the type we 3399 // have to create: 3400 // struct __tgt_offload_entry{ 3401 // void *addr; // Pointer to the offload entry info. 3402 // // (function or global) 3403 // char *name; // Name of the function or global. 3404 // size_t size; // Size of the entry info (0 if it a function). 3405 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3406 // int32_t reserved; // Reserved, to use by the runtime library. 3407 // }; 3408 if (TgtOffloadEntryQTy.isNull()) { 3409 ASTContext &C = CGM.getContext(); 3410 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3411 RD->startDefinition(); 3412 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3413 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3414 addFieldToRecordDecl(C, RD, C.getSizeType()); 3415 addFieldToRecordDecl( 3416 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3417 addFieldToRecordDecl( 3418 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3419 RD->completeDefinition(); 3420 RD->addAttr(PackedAttr::CreateImplicit(C)); 3421 TgtOffloadEntryQTy = C.getRecordType(RD); 3422 } 3423 return TgtOffloadEntryQTy; 3424 } 3425 3426 namespace { 3427 struct PrivateHelpersTy { 3428 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3429 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3430 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3431 PrivateElemInit(PrivateElemInit) {} 3432 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3433 const Expr *OriginalRef = nullptr; 3434 const VarDecl *Original = nullptr; 3435 const VarDecl *PrivateCopy = nullptr; 3436 const VarDecl *PrivateElemInit = nullptr; 3437 bool isLocalPrivate() const { 3438 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3439 } 3440 }; 3441 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3442 } // anonymous namespace 3443 3444 static bool isAllocatableDecl(const VarDecl *VD) { 3445 const VarDecl *CVD = VD->getCanonicalDecl(); 3446 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3447 return false; 3448 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3449 // Use the default allocation. 3450 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3451 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3452 !AA->getAllocator()); 3453 } 3454 3455 static RecordDecl * 3456 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3457 if (!Privates.empty()) { 3458 ASTContext &C = CGM.getContext(); 3459 // Build struct .kmp_privates_t. { 3460 // /* private vars */ 3461 // }; 3462 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3463 RD->startDefinition(); 3464 for (const auto &Pair : Privates) { 3465 const VarDecl *VD = Pair.second.Original; 3466 QualType Type = VD->getType().getNonReferenceType(); 3467 // If the private variable is a local variable with lvalue ref type, 3468 // allocate the pointer instead of the pointee type. 3469 if (Pair.second.isLocalPrivate()) { 3470 if (VD->getType()->isLValueReferenceType()) 3471 Type = C.getPointerType(Type); 3472 if (isAllocatableDecl(VD)) 3473 Type = C.getPointerType(Type); 3474 } 3475 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3476 if (VD->hasAttrs()) { 3477 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3478 E(VD->getAttrs().end()); 3479 I != E; ++I) 3480 FD->addAttr(*I); 3481 } 3482 } 3483 RD->completeDefinition(); 3484 return RD; 3485 } 3486 return nullptr; 3487 } 3488 3489 static RecordDecl * 3490 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3491 QualType KmpInt32Ty, 3492 QualType KmpRoutineEntryPointerQTy) { 3493 ASTContext &C = CGM.getContext(); 3494 // Build struct kmp_task_t { 3495 // void * shareds; 3496 // kmp_routine_entry_t routine; 3497 // kmp_int32 part_id; 3498 // kmp_cmplrdata_t data1; 3499 // kmp_cmplrdata_t data2; 3500 // For taskloops additional fields: 3501 // kmp_uint64 lb; 3502 // kmp_uint64 ub; 3503 // kmp_int64 st; 3504 // kmp_int32 liter; 3505 // void * reductions; 3506 // }; 3507 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3508 UD->startDefinition(); 3509 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3510 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3511 UD->completeDefinition(); 3512 QualType KmpCmplrdataTy = C.getRecordType(UD); 3513 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3514 RD->startDefinition(); 3515 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3516 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3517 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3518 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3519 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3520 if (isOpenMPTaskLoopDirective(Kind)) { 3521 QualType KmpUInt64Ty = 3522 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3523 QualType KmpInt64Ty = 3524 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3525 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3526 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3527 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3528 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3529 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3530 } 3531 RD->completeDefinition(); 3532 return RD; 3533 } 3534 3535 static RecordDecl * 3536 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3537 ArrayRef<PrivateDataTy> Privates) { 3538 ASTContext &C = CGM.getContext(); 3539 // Build struct kmp_task_t_with_privates { 3540 // kmp_task_t task_data; 3541 // .kmp_privates_t. privates; 3542 // }; 3543 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3544 RD->startDefinition(); 3545 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3546 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3547 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3548 RD->completeDefinition(); 3549 return RD; 3550 } 3551 3552 /// Emit a proxy function which accepts kmp_task_t as the second 3553 /// argument. 3554 /// \code 3555 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3556 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3557 /// For taskloops: 3558 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3559 /// tt->reductions, tt->shareds); 3560 /// return 0; 3561 /// } 3562 /// \endcode 3563 static llvm::Function * 3564 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3565 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3566 QualType KmpTaskTWithPrivatesPtrQTy, 3567 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3568 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3569 llvm::Value *TaskPrivatesMap) { 3570 ASTContext &C = CGM.getContext(); 3571 FunctionArgList Args; 3572 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3573 ImplicitParamDecl::Other); 3574 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3575 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3576 ImplicitParamDecl::Other); 3577 Args.push_back(&GtidArg); 3578 Args.push_back(&TaskTypeArg); 3579 const auto &TaskEntryFnInfo = 3580 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3581 llvm::FunctionType *TaskEntryTy = 3582 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3583 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3584 auto *TaskEntry = llvm::Function::Create( 3585 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3586 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3587 TaskEntry->setDoesNotRecurse(); 3588 CodeGenFunction CGF(CGM); 3589 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3590 Loc, Loc); 3591 3592 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3593 // tt, 3594 // For taskloops: 3595 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3596 // tt->task_data.shareds); 3597 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3598 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3599 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3600 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3601 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3602 const auto *KmpTaskTWithPrivatesQTyRD = 3603 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3604 LValue Base = 3605 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3606 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3607 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3608 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3609 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3610 3611 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3612 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3613 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3614 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3615 CGF.ConvertTypeForMem(SharedsPtrTy)); 3616 3617 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3618 llvm::Value *PrivatesParam; 3619 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3620 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3621 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3622 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3623 } else { 3624 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3625 } 3626 3627 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3628 TaskPrivatesMap, 3629 CGF.Builder 3630 .CreatePointerBitCastOrAddrSpaceCast( 3631 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3632 .getPointer()}; 3633 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3634 std::end(CommonArgs)); 3635 if (isOpenMPTaskLoopDirective(Kind)) { 3636 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3637 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3638 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3639 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3640 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3641 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3642 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3643 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3644 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3645 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3646 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3647 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3648 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3649 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3650 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3651 CallArgs.push_back(LBParam); 3652 CallArgs.push_back(UBParam); 3653 CallArgs.push_back(StParam); 3654 CallArgs.push_back(LIParam); 3655 CallArgs.push_back(RParam); 3656 } 3657 CallArgs.push_back(SharedsParam); 3658 3659 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3660 CallArgs); 3661 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3662 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3663 CGF.FinishFunction(); 3664 return TaskEntry; 3665 } 3666 3667 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3668 SourceLocation Loc, 3669 QualType KmpInt32Ty, 3670 QualType KmpTaskTWithPrivatesPtrQTy, 3671 QualType KmpTaskTWithPrivatesQTy) { 3672 ASTContext &C = CGM.getContext(); 3673 FunctionArgList Args; 3674 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3675 ImplicitParamDecl::Other); 3676 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3677 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3678 ImplicitParamDecl::Other); 3679 Args.push_back(&GtidArg); 3680 Args.push_back(&TaskTypeArg); 3681 const auto &DestructorFnInfo = 3682 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3683 llvm::FunctionType *DestructorFnTy = 3684 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3685 std::string Name = 3686 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3687 auto *DestructorFn = 3688 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3689 Name, &CGM.getModule()); 3690 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3691 DestructorFnInfo); 3692 DestructorFn->setDoesNotRecurse(); 3693 CodeGenFunction CGF(CGM); 3694 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3695 Args, Loc, Loc); 3696 3697 LValue Base = CGF.EmitLoadOfPointerLValue( 3698 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3699 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3700 const auto *KmpTaskTWithPrivatesQTyRD = 3701 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3702 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3703 Base = CGF.EmitLValueForField(Base, *FI); 3704 for (const auto *Field : 3705 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3706 if (QualType::DestructionKind DtorKind = 3707 Field->getType().isDestructedType()) { 3708 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3709 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3710 } 3711 } 3712 CGF.FinishFunction(); 3713 return DestructorFn; 3714 } 3715 3716 /// Emit a privates mapping function for correct handling of private and 3717 /// firstprivate variables. 3718 /// \code 3719 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3720 /// **noalias priv1,..., <tyn> **noalias privn) { 3721 /// *priv1 = &.privates.priv1; 3722 /// ...; 3723 /// *privn = &.privates.privn; 3724 /// } 3725 /// \endcode 3726 static llvm::Value * 3727 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3728 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3729 ArrayRef<PrivateDataTy> Privates) { 3730 ASTContext &C = CGM.getContext(); 3731 FunctionArgList Args; 3732 ImplicitParamDecl TaskPrivatesArg( 3733 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3734 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3735 ImplicitParamDecl::Other); 3736 Args.push_back(&TaskPrivatesArg); 3737 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3738 unsigned Counter = 1; 3739 for (const Expr *E : Data.PrivateVars) { 3740 Args.push_back(ImplicitParamDecl::Create( 3741 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3742 C.getPointerType(C.getPointerType(E->getType())) 3743 .withConst() 3744 .withRestrict(), 3745 ImplicitParamDecl::Other)); 3746 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3747 PrivateVarsPos[VD] = Counter; 3748 ++Counter; 3749 } 3750 for (const Expr *E : Data.FirstprivateVars) { 3751 Args.push_back(ImplicitParamDecl::Create( 3752 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3753 C.getPointerType(C.getPointerType(E->getType())) 3754 .withConst() 3755 .withRestrict(), 3756 ImplicitParamDecl::Other)); 3757 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3758 PrivateVarsPos[VD] = Counter; 3759 ++Counter; 3760 } 3761 for (const Expr *E : Data.LastprivateVars) { 3762 Args.push_back(ImplicitParamDecl::Create( 3763 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3764 C.getPointerType(C.getPointerType(E->getType())) 3765 .withConst() 3766 .withRestrict(), 3767 ImplicitParamDecl::Other)); 3768 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3769 PrivateVarsPos[VD] = Counter; 3770 ++Counter; 3771 } 3772 for (const VarDecl *VD : Data.PrivateLocals) { 3773 QualType Ty = VD->getType().getNonReferenceType(); 3774 if (VD->getType()->isLValueReferenceType()) 3775 Ty = C.getPointerType(Ty); 3776 if (isAllocatableDecl(VD)) 3777 Ty = C.getPointerType(Ty); 3778 Args.push_back(ImplicitParamDecl::Create( 3779 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3780 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3781 ImplicitParamDecl::Other)); 3782 PrivateVarsPos[VD] = Counter; 3783 ++Counter; 3784 } 3785 const auto &TaskPrivatesMapFnInfo = 3786 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3787 llvm::FunctionType *TaskPrivatesMapTy = 3788 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3789 std::string Name = 3790 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3791 auto *TaskPrivatesMap = llvm::Function::Create( 3792 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3793 &CGM.getModule()); 3794 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3795 TaskPrivatesMapFnInfo); 3796 if (CGM.getLangOpts().Optimize) { 3797 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3798 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3799 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3800 } 3801 CodeGenFunction CGF(CGM); 3802 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3803 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3804 3805 // *privi = &.privates.privi; 3806 LValue Base = CGF.EmitLoadOfPointerLValue( 3807 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3808 TaskPrivatesArg.getType()->castAs<PointerType>()); 3809 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3810 Counter = 0; 3811 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3812 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3813 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3814 LValue RefLVal = 3815 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3816 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3817 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3818 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3819 ++Counter; 3820 } 3821 CGF.FinishFunction(); 3822 return TaskPrivatesMap; 3823 } 3824 3825 /// Emit initialization for private variables in task-based directives. 3826 static void emitPrivatesInit(CodeGenFunction &CGF, 3827 const OMPExecutableDirective &D, 3828 Address KmpTaskSharedsPtr, LValue TDBase, 3829 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3830 QualType SharedsTy, QualType SharedsPtrTy, 3831 const OMPTaskDataTy &Data, 3832 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3833 ASTContext &C = CGF.getContext(); 3834 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3835 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3836 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3837 ? OMPD_taskloop 3838 : OMPD_task; 3839 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3840 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3841 LValue SrcBase; 3842 bool IsTargetTask = 3843 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3844 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3845 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3846 // PointersArray, SizesArray, and MappersArray. The original variables for 3847 // these arrays are not captured and we get their addresses explicitly. 3848 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3849 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3850 SrcBase = CGF.MakeAddrLValue( 3851 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3852 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3853 SharedsTy); 3854 } 3855 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3856 for (const PrivateDataTy &Pair : Privates) { 3857 // Do not initialize private locals. 3858 if (Pair.second.isLocalPrivate()) { 3859 ++FI; 3860 continue; 3861 } 3862 const VarDecl *VD = Pair.second.PrivateCopy; 3863 const Expr *Init = VD->getAnyInitializer(); 3864 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3865 !CGF.isTrivialInitializer(Init)))) { 3866 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3867 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3868 const VarDecl *OriginalVD = Pair.second.Original; 3869 // Check if the variable is the target-based BasePointersArray, 3870 // PointersArray, SizesArray, or MappersArray. 3871 LValue SharedRefLValue; 3872 QualType Type = PrivateLValue.getType(); 3873 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3874 if (IsTargetTask && !SharedField) { 3875 assert(isa<ImplicitParamDecl>(OriginalVD) && 3876 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3877 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3878 ->getNumParams() == 0 && 3879 isa<TranslationUnitDecl>( 3880 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3881 ->getDeclContext()) && 3882 "Expected artificial target data variable."); 3883 SharedRefLValue = 3884 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3885 } else if (ForDup) { 3886 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3887 SharedRefLValue = CGF.MakeAddrLValue( 3888 Address(SharedRefLValue.getPointer(CGF), 3889 C.getDeclAlign(OriginalVD)), 3890 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3891 SharedRefLValue.getTBAAInfo()); 3892 } else if (CGF.LambdaCaptureFields.count( 3893 Pair.second.Original->getCanonicalDecl()) > 0 || 3894 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3895 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3896 } else { 3897 // Processing for implicitly captured variables. 3898 InlinedOpenMPRegionRAII Region( 3899 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3900 /*HasCancel=*/false, /*NoInheritance=*/true); 3901 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3902 } 3903 if (Type->isArrayType()) { 3904 // Initialize firstprivate array. 3905 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3906 // Perform simple memcpy. 3907 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3908 } else { 3909 // Initialize firstprivate array using element-by-element 3910 // initialization. 3911 CGF.EmitOMPAggregateAssign( 3912 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3913 Type, 3914 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3915 Address SrcElement) { 3916 // Clean up any temporaries needed by the initialization. 3917 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3918 InitScope.addPrivate( 3919 Elem, [SrcElement]() -> Address { return SrcElement; }); 3920 (void)InitScope.Privatize(); 3921 // Emit initialization for single element. 3922 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3923 CGF, &CapturesInfo); 3924 CGF.EmitAnyExprToMem(Init, DestElement, 3925 Init->getType().getQualifiers(), 3926 /*IsInitializer=*/false); 3927 }); 3928 } 3929 } else { 3930 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3931 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3932 return SharedRefLValue.getAddress(CGF); 3933 }); 3934 (void)InitScope.Privatize(); 3935 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3936 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3937 /*capturedByInit=*/false); 3938 } 3939 } else { 3940 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3941 } 3942 } 3943 ++FI; 3944 } 3945 } 3946 3947 /// Check if duplication function is required for taskloops. 3948 static bool checkInitIsRequired(CodeGenFunction &CGF, 3949 ArrayRef<PrivateDataTy> Privates) { 3950 bool InitRequired = false; 3951 for (const PrivateDataTy &Pair : Privates) { 3952 if (Pair.second.isLocalPrivate()) 3953 continue; 3954 const VarDecl *VD = Pair.second.PrivateCopy; 3955 const Expr *Init = VD->getAnyInitializer(); 3956 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3957 !CGF.isTrivialInitializer(Init)); 3958 if (InitRequired) 3959 break; 3960 } 3961 return InitRequired; 3962 } 3963 3964 3965 /// Emit task_dup function (for initialization of 3966 /// private/firstprivate/lastprivate vars and last_iter flag) 3967 /// \code 3968 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3969 /// lastpriv) { 3970 /// // setup lastprivate flag 3971 /// task_dst->last = lastpriv; 3972 /// // could be constructor calls here... 3973 /// } 3974 /// \endcode 3975 static llvm::Value * 3976 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3977 const OMPExecutableDirective &D, 3978 QualType KmpTaskTWithPrivatesPtrQTy, 3979 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3980 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3981 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3982 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3983 ASTContext &C = CGM.getContext(); 3984 FunctionArgList Args; 3985 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3986 KmpTaskTWithPrivatesPtrQTy, 3987 ImplicitParamDecl::Other); 3988 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3989 KmpTaskTWithPrivatesPtrQTy, 3990 ImplicitParamDecl::Other); 3991 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3992 ImplicitParamDecl::Other); 3993 Args.push_back(&DstArg); 3994 Args.push_back(&SrcArg); 3995 Args.push_back(&LastprivArg); 3996 const auto &TaskDupFnInfo = 3997 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3998 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3999 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4000 auto *TaskDup = llvm::Function::Create( 4001 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4002 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4003 TaskDup->setDoesNotRecurse(); 4004 CodeGenFunction CGF(CGM); 4005 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4006 Loc); 4007 4008 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4009 CGF.GetAddrOfLocalVar(&DstArg), 4010 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4011 // task_dst->liter = lastpriv; 4012 if (WithLastIter) { 4013 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4014 LValue Base = CGF.EmitLValueForField( 4015 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4016 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4017 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4018 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4019 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4020 } 4021 4022 // Emit initial values for private copies (if any). 4023 assert(!Privates.empty()); 4024 Address KmpTaskSharedsPtr = Address::invalid(); 4025 if (!Data.FirstprivateVars.empty()) { 4026 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4027 CGF.GetAddrOfLocalVar(&SrcArg), 4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4029 LValue Base = CGF.EmitLValueForField( 4030 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4031 KmpTaskSharedsPtr = Address( 4032 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4033 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4034 KmpTaskTShareds)), 4035 Loc), 4036 CGM.getNaturalTypeAlignment(SharedsTy)); 4037 } 4038 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4039 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4040 CGF.FinishFunction(); 4041 return TaskDup; 4042 } 4043 4044 /// Checks if destructor function is required to be generated. 4045 /// \return true if cleanups are required, false otherwise. 4046 static bool 4047 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4048 ArrayRef<PrivateDataTy> Privates) { 4049 for (const PrivateDataTy &P : Privates) { 4050 if (P.second.isLocalPrivate()) 4051 continue; 4052 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4053 if (Ty.isDestructedType()) 4054 return true; 4055 } 4056 return false; 4057 } 4058 4059 namespace { 4060 /// Loop generator for OpenMP iterator expression. 4061 class OMPIteratorGeneratorScope final 4062 : public CodeGenFunction::OMPPrivateScope { 4063 CodeGenFunction &CGF; 4064 const OMPIteratorExpr *E = nullptr; 4065 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4066 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4067 OMPIteratorGeneratorScope() = delete; 4068 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4069 4070 public: 4071 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4072 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4073 if (!E) 4074 return; 4075 SmallVector<llvm::Value *, 4> Uppers; 4076 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4077 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4078 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4079 addPrivate(VD, [&CGF, VD]() { 4080 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4081 }); 4082 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4083 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4084 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4085 "counter.addr"); 4086 }); 4087 } 4088 Privatize(); 4089 4090 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4091 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4092 LValue CLVal = 4093 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4094 HelperData.CounterVD->getType()); 4095 // Counter = 0; 4096 CGF.EmitStoreOfScalar( 4097 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4098 CLVal); 4099 CodeGenFunction::JumpDest &ContDest = 4100 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4101 CodeGenFunction::JumpDest &ExitDest = 4102 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4103 // N = <number-of_iterations>; 4104 llvm::Value *N = Uppers[I]; 4105 // cont: 4106 // if (Counter < N) goto body; else goto exit; 4107 CGF.EmitBlock(ContDest.getBlock()); 4108 auto *CVal = 4109 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4110 llvm::Value *Cmp = 4111 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4112 ? CGF.Builder.CreateICmpSLT(CVal, N) 4113 : CGF.Builder.CreateICmpULT(CVal, N); 4114 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4115 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4116 // body: 4117 CGF.EmitBlock(BodyBB); 4118 // Iteri = Begini + Counter * Stepi; 4119 CGF.EmitIgnoredExpr(HelperData.Update); 4120 } 4121 } 4122 ~OMPIteratorGeneratorScope() { 4123 if (!E) 4124 return; 4125 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4126 // Counter = Counter + 1; 4127 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4128 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4129 // goto cont; 4130 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4131 // exit: 4132 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4133 } 4134 } 4135 }; 4136 } // namespace 4137 4138 static std::pair<llvm::Value *, llvm::Value *> 4139 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4140 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4141 llvm::Value *Addr; 4142 if (OASE) { 4143 const Expr *Base = OASE->getBase(); 4144 Addr = CGF.EmitScalarExpr(Base); 4145 } else { 4146 Addr = CGF.EmitLValue(E).getPointer(CGF); 4147 } 4148 llvm::Value *SizeVal; 4149 QualType Ty = E->getType(); 4150 if (OASE) { 4151 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4152 for (const Expr *SE : OASE->getDimensions()) { 4153 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4154 Sz = CGF.EmitScalarConversion( 4155 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4156 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4157 } 4158 } else if (const auto *ASE = 4159 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4160 LValue UpAddrLVal = 4161 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4162 llvm::Value *UpAddr = 4163 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4164 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4165 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4166 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4167 } else { 4168 SizeVal = CGF.getTypeSize(Ty); 4169 } 4170 return std::make_pair(Addr, SizeVal); 4171 } 4172 4173 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4174 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4175 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4176 if (KmpTaskAffinityInfoTy.isNull()) { 4177 RecordDecl *KmpAffinityInfoRD = 4178 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4179 KmpAffinityInfoRD->startDefinition(); 4180 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4181 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4182 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4183 KmpAffinityInfoRD->completeDefinition(); 4184 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4185 } 4186 } 4187 4188 CGOpenMPRuntime::TaskResultTy 4189 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4190 const OMPExecutableDirective &D, 4191 llvm::Function *TaskFunction, QualType SharedsTy, 4192 Address Shareds, const OMPTaskDataTy &Data) { 4193 ASTContext &C = CGM.getContext(); 4194 llvm::SmallVector<PrivateDataTy, 4> Privates; 4195 // Aggregate privates and sort them by the alignment. 4196 const auto *I = Data.PrivateCopies.begin(); 4197 for (const Expr *E : Data.PrivateVars) { 4198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4199 Privates.emplace_back( 4200 C.getDeclAlign(VD), 4201 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4202 /*PrivateElemInit=*/nullptr)); 4203 ++I; 4204 } 4205 I = Data.FirstprivateCopies.begin(); 4206 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4207 for (const Expr *E : Data.FirstprivateVars) { 4208 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4209 Privates.emplace_back( 4210 C.getDeclAlign(VD), 4211 PrivateHelpersTy( 4212 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4213 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4214 ++I; 4215 ++IElemInitRef; 4216 } 4217 I = Data.LastprivateCopies.begin(); 4218 for (const Expr *E : Data.LastprivateVars) { 4219 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4220 Privates.emplace_back( 4221 C.getDeclAlign(VD), 4222 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4223 /*PrivateElemInit=*/nullptr)); 4224 ++I; 4225 } 4226 for (const VarDecl *VD : Data.PrivateLocals) { 4227 if (isAllocatableDecl(VD)) 4228 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4229 else 4230 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4231 } 4232 llvm::stable_sort(Privates, 4233 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4234 return L.first > R.first; 4235 }); 4236 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4237 // Build type kmp_routine_entry_t (if not built yet). 4238 emitKmpRoutineEntryT(KmpInt32Ty); 4239 // Build type kmp_task_t (if not built yet). 4240 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4241 if (SavedKmpTaskloopTQTy.isNull()) { 4242 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4243 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4244 } 4245 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4246 } else { 4247 assert((D.getDirectiveKind() == OMPD_task || 4248 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4249 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4250 "Expected taskloop, task or target directive"); 4251 if (SavedKmpTaskTQTy.isNull()) { 4252 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4253 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4254 } 4255 KmpTaskTQTy = SavedKmpTaskTQTy; 4256 } 4257 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4258 // Build particular struct kmp_task_t for the given task. 4259 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4260 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4261 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4262 QualType KmpTaskTWithPrivatesPtrQTy = 4263 C.getPointerType(KmpTaskTWithPrivatesQTy); 4264 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4265 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4266 KmpTaskTWithPrivatesTy->getPointerTo(); 4267 llvm::Value *KmpTaskTWithPrivatesTySize = 4268 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4269 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4270 4271 // Emit initial values for private copies (if any). 4272 llvm::Value *TaskPrivatesMap = nullptr; 4273 llvm::Type *TaskPrivatesMapTy = 4274 std::next(TaskFunction->arg_begin(), 3)->getType(); 4275 if (!Privates.empty()) { 4276 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4277 TaskPrivatesMap = 4278 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4279 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4280 TaskPrivatesMap, TaskPrivatesMapTy); 4281 } else { 4282 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4283 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4284 } 4285 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4286 // kmp_task_t *tt); 4287 llvm::Function *TaskEntry = emitProxyTaskFunction( 4288 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4289 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4290 TaskPrivatesMap); 4291 4292 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4293 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4294 // kmp_routine_entry_t *task_entry); 4295 // Task flags. Format is taken from 4296 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4297 // description of kmp_tasking_flags struct. 4298 enum { 4299 TiedFlag = 0x1, 4300 FinalFlag = 0x2, 4301 DestructorsFlag = 0x8, 4302 PriorityFlag = 0x20, 4303 DetachableFlag = 0x40, 4304 }; 4305 unsigned Flags = Data.Tied ? TiedFlag : 0; 4306 bool NeedsCleanup = false; 4307 if (!Privates.empty()) { 4308 NeedsCleanup = 4309 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4310 if (NeedsCleanup) 4311 Flags = Flags | DestructorsFlag; 4312 } 4313 if (Data.Priority.getInt()) 4314 Flags = Flags | PriorityFlag; 4315 if (D.hasClausesOfKind<OMPDetachClause>()) 4316 Flags = Flags | DetachableFlag; 4317 llvm::Value *TaskFlags = 4318 Data.Final.getPointer() 4319 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4320 CGF.Builder.getInt32(FinalFlag), 4321 CGF.Builder.getInt32(/*C=*/0)) 4322 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4323 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4324 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4325 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4326 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4327 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4328 TaskEntry, KmpRoutineEntryPtrTy)}; 4329 llvm::Value *NewTask; 4330 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4331 // Check if we have any device clause associated with the directive. 4332 const Expr *Device = nullptr; 4333 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4334 Device = C->getDevice(); 4335 // Emit device ID if any otherwise use default value. 4336 llvm::Value *DeviceID; 4337 if (Device) 4338 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4339 CGF.Int64Ty, /*isSigned=*/true); 4340 else 4341 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4342 AllocArgs.push_back(DeviceID); 4343 NewTask = CGF.EmitRuntimeCall( 4344 OMPBuilder.getOrCreateRuntimeFunction( 4345 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4346 AllocArgs); 4347 } else { 4348 NewTask = 4349 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4350 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4351 AllocArgs); 4352 } 4353 // Emit detach clause initialization. 4354 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4355 // task_descriptor); 4356 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4357 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4358 LValue EvtLVal = CGF.EmitLValue(Evt); 4359 4360 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4361 // int gtid, kmp_task_t *task); 4362 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4363 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4364 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4365 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4366 OMPBuilder.getOrCreateRuntimeFunction( 4367 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4368 {Loc, Tid, NewTask}); 4369 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4370 Evt->getExprLoc()); 4371 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4372 } 4373 // Process affinity clauses. 4374 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4375 // Process list of affinity data. 4376 ASTContext &C = CGM.getContext(); 4377 Address AffinitiesArray = Address::invalid(); 4378 // Calculate number of elements to form the array of affinity data. 4379 llvm::Value *NumOfElements = nullptr; 4380 unsigned NumAffinities = 0; 4381 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4382 if (const Expr *Modifier = C->getModifier()) { 4383 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4384 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4385 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4386 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4387 NumOfElements = 4388 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4389 } 4390 } else { 4391 NumAffinities += C->varlist_size(); 4392 } 4393 } 4394 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4395 // Fields ids in kmp_task_affinity_info record. 4396 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4397 4398 QualType KmpTaskAffinityInfoArrayTy; 4399 if (NumOfElements) { 4400 NumOfElements = CGF.Builder.CreateNUWAdd( 4401 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4402 OpaqueValueExpr OVE( 4403 Loc, 4404 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4405 VK_RValue); 4406 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4407 RValue::get(NumOfElements)); 4408 KmpTaskAffinityInfoArrayTy = 4409 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4410 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4411 // Properly emit variable-sized array. 4412 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4413 ImplicitParamDecl::Other); 4414 CGF.EmitVarDecl(*PD); 4415 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4416 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4417 /*isSigned=*/false); 4418 } else { 4419 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4420 KmpTaskAffinityInfoTy, 4421 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4422 ArrayType::Normal, /*IndexTypeQuals=*/0); 4423 AffinitiesArray = 4424 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4425 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4426 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4427 /*isSigned=*/false); 4428 } 4429 4430 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4431 // Fill array by elements without iterators. 4432 unsigned Pos = 0; 4433 bool HasIterator = false; 4434 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4435 if (C->getModifier()) { 4436 HasIterator = true; 4437 continue; 4438 } 4439 for (const Expr *E : C->varlists()) { 4440 llvm::Value *Addr; 4441 llvm::Value *Size; 4442 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4443 LValue Base = 4444 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4445 KmpTaskAffinityInfoTy); 4446 // affs[i].base_addr = &<Affinities[i].second>; 4447 LValue BaseAddrLVal = CGF.EmitLValueForField( 4448 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4449 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4450 BaseAddrLVal); 4451 // affs[i].len = sizeof(<Affinities[i].second>); 4452 LValue LenLVal = CGF.EmitLValueForField( 4453 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4454 CGF.EmitStoreOfScalar(Size, LenLVal); 4455 ++Pos; 4456 } 4457 } 4458 LValue PosLVal; 4459 if (HasIterator) { 4460 PosLVal = CGF.MakeAddrLValue( 4461 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4462 C.getSizeType()); 4463 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4464 } 4465 // Process elements with iterators. 4466 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4467 const Expr *Modifier = C->getModifier(); 4468 if (!Modifier) 4469 continue; 4470 OMPIteratorGeneratorScope IteratorScope( 4471 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4472 for (const Expr *E : C->varlists()) { 4473 llvm::Value *Addr; 4474 llvm::Value *Size; 4475 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4476 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4477 LValue Base = CGF.MakeAddrLValue( 4478 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4479 AffinitiesArray.getAlignment()), 4480 KmpTaskAffinityInfoTy); 4481 // affs[i].base_addr = &<Affinities[i].second>; 4482 LValue BaseAddrLVal = CGF.EmitLValueForField( 4483 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4484 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4485 BaseAddrLVal); 4486 // affs[i].len = sizeof(<Affinities[i].second>); 4487 LValue LenLVal = CGF.EmitLValueForField( 4488 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4489 CGF.EmitStoreOfScalar(Size, LenLVal); 4490 Idx = CGF.Builder.CreateNUWAdd( 4491 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4492 CGF.EmitStoreOfScalar(Idx, PosLVal); 4493 } 4494 } 4495 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4496 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4497 // naffins, kmp_task_affinity_info_t *affin_list); 4498 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4499 llvm::Value *GTid = getThreadID(CGF, Loc); 4500 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4501 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4502 // FIXME: Emit the function and ignore its result for now unless the 4503 // runtime function is properly implemented. 4504 (void)CGF.EmitRuntimeCall( 4505 OMPBuilder.getOrCreateRuntimeFunction( 4506 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4507 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4508 } 4509 llvm::Value *NewTaskNewTaskTTy = 4510 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4511 NewTask, KmpTaskTWithPrivatesPtrTy); 4512 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4513 KmpTaskTWithPrivatesQTy); 4514 LValue TDBase = 4515 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4516 // Fill the data in the resulting kmp_task_t record. 4517 // Copy shareds if there are any. 4518 Address KmpTaskSharedsPtr = Address::invalid(); 4519 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4520 KmpTaskSharedsPtr = 4521 Address(CGF.EmitLoadOfScalar( 4522 CGF.EmitLValueForField( 4523 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4524 KmpTaskTShareds)), 4525 Loc), 4526 CGM.getNaturalTypeAlignment(SharedsTy)); 4527 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4528 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4529 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4530 } 4531 // Emit initial values for private copies (if any). 4532 TaskResultTy Result; 4533 if (!Privates.empty()) { 4534 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4535 SharedsTy, SharedsPtrTy, Data, Privates, 4536 /*ForDup=*/false); 4537 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4538 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4539 Result.TaskDupFn = emitTaskDupFunction( 4540 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4541 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4542 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4543 } 4544 } 4545 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4546 enum { Priority = 0, Destructors = 1 }; 4547 // Provide pointer to function with destructors for privates. 4548 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4549 const RecordDecl *KmpCmplrdataUD = 4550 (*FI)->getType()->getAsUnionType()->getDecl(); 4551 if (NeedsCleanup) { 4552 llvm::Value *DestructorFn = emitDestructorsFunction( 4553 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4554 KmpTaskTWithPrivatesQTy); 4555 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4556 LValue DestructorsLV = CGF.EmitLValueForField( 4557 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4558 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4559 DestructorFn, KmpRoutineEntryPtrTy), 4560 DestructorsLV); 4561 } 4562 // Set priority. 4563 if (Data.Priority.getInt()) { 4564 LValue Data2LV = CGF.EmitLValueForField( 4565 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4566 LValue PriorityLV = CGF.EmitLValueForField( 4567 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4568 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4569 } 4570 Result.NewTask = NewTask; 4571 Result.TaskEntry = TaskEntry; 4572 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4573 Result.TDBase = TDBase; 4574 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4575 return Result; 4576 } 4577 4578 namespace { 4579 /// Dependence kind for RTL. 4580 enum RTLDependenceKindTy { 4581 DepIn = 0x01, 4582 DepInOut = 0x3, 4583 DepMutexInOutSet = 0x4 4584 }; 4585 /// Fields ids in kmp_depend_info record. 4586 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4587 } // namespace 4588 4589 /// Translates internal dependency kind into the runtime kind. 4590 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4591 RTLDependenceKindTy DepKind; 4592 switch (K) { 4593 case OMPC_DEPEND_in: 4594 DepKind = DepIn; 4595 break; 4596 // Out and InOut dependencies must use the same code. 4597 case OMPC_DEPEND_out: 4598 case OMPC_DEPEND_inout: 4599 DepKind = DepInOut; 4600 break; 4601 case OMPC_DEPEND_mutexinoutset: 4602 DepKind = DepMutexInOutSet; 4603 break; 4604 case OMPC_DEPEND_source: 4605 case OMPC_DEPEND_sink: 4606 case OMPC_DEPEND_depobj: 4607 case OMPC_DEPEND_unknown: 4608 llvm_unreachable("Unknown task dependence type"); 4609 } 4610 return DepKind; 4611 } 4612 4613 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4614 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4615 QualType &FlagsTy) { 4616 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4617 if (KmpDependInfoTy.isNull()) { 4618 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4619 KmpDependInfoRD->startDefinition(); 4620 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4621 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4622 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4623 KmpDependInfoRD->completeDefinition(); 4624 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4625 } 4626 } 4627 4628 std::pair<llvm::Value *, LValue> 4629 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4630 SourceLocation Loc) { 4631 ASTContext &C = CGM.getContext(); 4632 QualType FlagsTy; 4633 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4634 RecordDecl *KmpDependInfoRD = 4635 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4636 LValue Base = CGF.EmitLoadOfPointerLValue( 4637 DepobjLVal.getAddress(CGF), 4638 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4639 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4640 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4641 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4642 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4643 Base.getTBAAInfo()); 4644 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4645 Addr.getPointer(), 4646 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4647 LValue NumDepsBase = CGF.MakeAddrLValue( 4648 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4649 Base.getBaseInfo(), Base.getTBAAInfo()); 4650 // NumDeps = deps[i].base_addr; 4651 LValue BaseAddrLVal = CGF.EmitLValueForField( 4652 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4653 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4654 return std::make_pair(NumDeps, Base); 4655 } 4656 4657 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4658 llvm::PointerUnion<unsigned *, LValue *> Pos, 4659 const OMPTaskDataTy::DependData &Data, 4660 Address DependenciesArray) { 4661 CodeGenModule &CGM = CGF.CGM; 4662 ASTContext &C = CGM.getContext(); 4663 QualType FlagsTy; 4664 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4665 RecordDecl *KmpDependInfoRD = 4666 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4667 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4668 4669 OMPIteratorGeneratorScope IteratorScope( 4670 CGF, cast_or_null<OMPIteratorExpr>( 4671 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4672 : nullptr)); 4673 for (const Expr *E : Data.DepExprs) { 4674 llvm::Value *Addr; 4675 llvm::Value *Size; 4676 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4677 LValue Base; 4678 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4679 Base = CGF.MakeAddrLValue( 4680 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4681 } else { 4682 LValue &PosLVal = *Pos.get<LValue *>(); 4683 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4684 Base = CGF.MakeAddrLValue( 4685 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4686 DependenciesArray.getAlignment()), 4687 KmpDependInfoTy); 4688 } 4689 // deps[i].base_addr = &<Dependencies[i].second>; 4690 LValue BaseAddrLVal = CGF.EmitLValueForField( 4691 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4692 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4693 BaseAddrLVal); 4694 // deps[i].len = sizeof(<Dependencies[i].second>); 4695 LValue LenLVal = CGF.EmitLValueForField( 4696 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4697 CGF.EmitStoreOfScalar(Size, LenLVal); 4698 // deps[i].flags = <Dependencies[i].first>; 4699 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4700 LValue FlagsLVal = CGF.EmitLValueForField( 4701 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4702 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4703 FlagsLVal); 4704 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4705 ++(*P); 4706 } else { 4707 LValue &PosLVal = *Pos.get<LValue *>(); 4708 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4709 Idx = CGF.Builder.CreateNUWAdd(Idx, 4710 llvm::ConstantInt::get(Idx->getType(), 1)); 4711 CGF.EmitStoreOfScalar(Idx, PosLVal); 4712 } 4713 } 4714 } 4715 4716 static SmallVector<llvm::Value *, 4> 4717 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4718 const OMPTaskDataTy::DependData &Data) { 4719 assert(Data.DepKind == OMPC_DEPEND_depobj && 4720 "Expected depobj dependecy kind."); 4721 SmallVector<llvm::Value *, 4> Sizes; 4722 SmallVector<LValue, 4> SizeLVals; 4723 ASTContext &C = CGF.getContext(); 4724 QualType FlagsTy; 4725 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4726 RecordDecl *KmpDependInfoRD = 4727 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4728 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4729 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4730 { 4731 OMPIteratorGeneratorScope IteratorScope( 4732 CGF, cast_or_null<OMPIteratorExpr>( 4733 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4734 : nullptr)); 4735 for (const Expr *E : Data.DepExprs) { 4736 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4737 LValue Base = CGF.EmitLoadOfPointerLValue( 4738 DepobjLVal.getAddress(CGF), 4739 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4740 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4741 Base.getAddress(CGF), KmpDependInfoPtrT); 4742 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4743 Base.getTBAAInfo()); 4744 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4745 Addr.getPointer(), 4746 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4747 LValue NumDepsBase = CGF.MakeAddrLValue( 4748 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4749 Base.getBaseInfo(), Base.getTBAAInfo()); 4750 // NumDeps = deps[i].base_addr; 4751 LValue BaseAddrLVal = CGF.EmitLValueForField( 4752 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4753 llvm::Value *NumDeps = 4754 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4755 LValue NumLVal = CGF.MakeAddrLValue( 4756 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4757 C.getUIntPtrType()); 4758 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4759 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4760 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4761 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4762 CGF.EmitStoreOfScalar(Add, NumLVal); 4763 SizeLVals.push_back(NumLVal); 4764 } 4765 } 4766 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4767 llvm::Value *Size = 4768 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4769 Sizes.push_back(Size); 4770 } 4771 return Sizes; 4772 } 4773 4774 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4775 LValue PosLVal, 4776 const OMPTaskDataTy::DependData &Data, 4777 Address DependenciesArray) { 4778 assert(Data.DepKind == OMPC_DEPEND_depobj && 4779 "Expected depobj dependecy kind."); 4780 ASTContext &C = CGF.getContext(); 4781 QualType FlagsTy; 4782 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4783 RecordDecl *KmpDependInfoRD = 4784 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4785 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4786 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4787 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4788 { 4789 OMPIteratorGeneratorScope IteratorScope( 4790 CGF, cast_or_null<OMPIteratorExpr>( 4791 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4792 : nullptr)); 4793 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4794 const Expr *E = Data.DepExprs[I]; 4795 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4796 LValue Base = CGF.EmitLoadOfPointerLValue( 4797 DepobjLVal.getAddress(CGF), 4798 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4799 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4800 Base.getAddress(CGF), KmpDependInfoPtrT); 4801 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4802 Base.getTBAAInfo()); 4803 4804 // Get number of elements in a single depobj. 4805 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4806 Addr.getPointer(), 4807 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4808 LValue NumDepsBase = CGF.MakeAddrLValue( 4809 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4810 Base.getBaseInfo(), Base.getTBAAInfo()); 4811 // NumDeps = deps[i].base_addr; 4812 LValue BaseAddrLVal = CGF.EmitLValueForField( 4813 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4814 llvm::Value *NumDeps = 4815 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4816 4817 // memcopy dependency data. 4818 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4819 ElSize, 4820 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4821 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4822 Address DepAddr = 4823 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4824 DependenciesArray.getAlignment()); 4825 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4826 4827 // Increase pos. 4828 // pos += size; 4829 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4830 CGF.EmitStoreOfScalar(Add, PosLVal); 4831 } 4832 } 4833 } 4834 4835 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4836 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4837 SourceLocation Loc) { 4838 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4839 return D.DepExprs.empty(); 4840 })) 4841 return std::make_pair(nullptr, Address::invalid()); 4842 // Process list of dependencies. 4843 ASTContext &C = CGM.getContext(); 4844 Address DependenciesArray = Address::invalid(); 4845 llvm::Value *NumOfElements = nullptr; 4846 unsigned NumDependencies = std::accumulate( 4847 Dependencies.begin(), Dependencies.end(), 0, 4848 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4849 return D.DepKind == OMPC_DEPEND_depobj 4850 ? V 4851 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4852 }); 4853 QualType FlagsTy; 4854 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4855 bool HasDepobjDeps = false; 4856 bool HasRegularWithIterators = false; 4857 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4858 llvm::Value *NumOfRegularWithIterators = 4859 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4860 // Calculate number of depobj dependecies and regular deps with the iterators. 4861 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4862 if (D.DepKind == OMPC_DEPEND_depobj) { 4863 SmallVector<llvm::Value *, 4> Sizes = 4864 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4865 for (llvm::Value *Size : Sizes) { 4866 NumOfDepobjElements = 4867 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4868 } 4869 HasDepobjDeps = true; 4870 continue; 4871 } 4872 // Include number of iterations, if any. 4873 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4874 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4875 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4876 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4877 NumOfRegularWithIterators = 4878 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4879 } 4880 HasRegularWithIterators = true; 4881 continue; 4882 } 4883 } 4884 4885 QualType KmpDependInfoArrayTy; 4886 if (HasDepobjDeps || HasRegularWithIterators) { 4887 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4888 /*isSigned=*/false); 4889 if (HasDepobjDeps) { 4890 NumOfElements = 4891 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4892 } 4893 if (HasRegularWithIterators) { 4894 NumOfElements = 4895 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4896 } 4897 OpaqueValueExpr OVE(Loc, 4898 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4899 VK_RValue); 4900 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4901 RValue::get(NumOfElements)); 4902 KmpDependInfoArrayTy = 4903 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4904 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4905 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4906 // Properly emit variable-sized array. 4907 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4908 ImplicitParamDecl::Other); 4909 CGF.EmitVarDecl(*PD); 4910 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4911 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4912 /*isSigned=*/false); 4913 } else { 4914 KmpDependInfoArrayTy = C.getConstantArrayType( 4915 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4916 ArrayType::Normal, /*IndexTypeQuals=*/0); 4917 DependenciesArray = 4918 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4919 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4920 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4921 /*isSigned=*/false); 4922 } 4923 unsigned Pos = 0; 4924 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4925 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4926 Dependencies[I].IteratorExpr) 4927 continue; 4928 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4929 DependenciesArray); 4930 } 4931 // Copy regular dependecies with iterators. 4932 LValue PosLVal = CGF.MakeAddrLValue( 4933 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4934 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4935 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4936 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4937 !Dependencies[I].IteratorExpr) 4938 continue; 4939 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4940 DependenciesArray); 4941 } 4942 // Copy final depobj arrays without iterators. 4943 if (HasDepobjDeps) { 4944 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4945 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4946 continue; 4947 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4948 DependenciesArray); 4949 } 4950 } 4951 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4952 DependenciesArray, CGF.VoidPtrTy); 4953 return std::make_pair(NumOfElements, DependenciesArray); 4954 } 4955 4956 Address CGOpenMPRuntime::emitDepobjDependClause( 4957 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4958 SourceLocation Loc) { 4959 if (Dependencies.DepExprs.empty()) 4960 return Address::invalid(); 4961 // Process list of dependencies. 4962 ASTContext &C = CGM.getContext(); 4963 Address DependenciesArray = Address::invalid(); 4964 unsigned NumDependencies = Dependencies.DepExprs.size(); 4965 QualType FlagsTy; 4966 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4967 RecordDecl *KmpDependInfoRD = 4968 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4969 4970 llvm::Value *Size; 4971 // Define type kmp_depend_info[<Dependencies.size()>]; 4972 // For depobj reserve one extra element to store the number of elements. 4973 // It is required to handle depobj(x) update(in) construct. 4974 // kmp_depend_info[<Dependencies.size()>] deps; 4975 llvm::Value *NumDepsVal; 4976 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4977 if (const auto *IE = 4978 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4979 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4980 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4981 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4982 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4983 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4984 } 4985 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4986 NumDepsVal); 4987 CharUnits SizeInBytes = 4988 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4989 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4990 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4991 NumDepsVal = 4992 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4993 } else { 4994 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4995 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4996 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4997 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4998 Size = CGM.getSize(Sz.alignTo(Align)); 4999 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5000 } 5001 // Need to allocate on the dynamic memory. 5002 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5003 // Use default allocator. 5004 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5005 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5006 5007 llvm::Value *Addr = 5008 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5009 CGM.getModule(), OMPRTL___kmpc_alloc), 5010 Args, ".dep.arr.addr"); 5011 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5012 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5013 DependenciesArray = Address(Addr, Align); 5014 // Write number of elements in the first element of array for depobj. 5015 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5016 // deps[i].base_addr = NumDependencies; 5017 LValue BaseAddrLVal = CGF.EmitLValueForField( 5018 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5019 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5020 llvm::PointerUnion<unsigned *, LValue *> Pos; 5021 unsigned Idx = 1; 5022 LValue PosLVal; 5023 if (Dependencies.IteratorExpr) { 5024 PosLVal = CGF.MakeAddrLValue( 5025 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5026 C.getSizeType()); 5027 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5028 /*IsInit=*/true); 5029 Pos = &PosLVal; 5030 } else { 5031 Pos = &Idx; 5032 } 5033 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5034 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5035 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5036 return DependenciesArray; 5037 } 5038 5039 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5040 SourceLocation Loc) { 5041 ASTContext &C = CGM.getContext(); 5042 QualType FlagsTy; 5043 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5044 LValue Base = CGF.EmitLoadOfPointerLValue( 5045 DepobjLVal.getAddress(CGF), 5046 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5047 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5048 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5049 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5050 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5051 Addr.getPointer(), 5052 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5053 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5054 CGF.VoidPtrTy); 5055 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5056 // Use default allocator. 5057 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5058 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5059 5060 // _kmpc_free(gtid, addr, nullptr); 5061 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5062 CGM.getModule(), OMPRTL___kmpc_free), 5063 Args); 5064 } 5065 5066 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5067 OpenMPDependClauseKind NewDepKind, 5068 SourceLocation Loc) { 5069 ASTContext &C = CGM.getContext(); 5070 QualType FlagsTy; 5071 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5072 RecordDecl *KmpDependInfoRD = 5073 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5074 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5075 llvm::Value *NumDeps; 5076 LValue Base; 5077 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5078 5079 Address Begin = Base.getAddress(CGF); 5080 // Cast from pointer to array type to pointer to single element. 5081 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5082 // The basic structure here is a while-do loop. 5083 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5084 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5085 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5086 CGF.EmitBlock(BodyBB); 5087 llvm::PHINode *ElementPHI = 5088 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5089 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5090 Begin = Address(ElementPHI, Begin.getAlignment()); 5091 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5092 Base.getTBAAInfo()); 5093 // deps[i].flags = NewDepKind; 5094 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5095 LValue FlagsLVal = CGF.EmitLValueForField( 5096 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5097 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5098 FlagsLVal); 5099 5100 // Shift the address forward by one element. 5101 Address ElementNext = 5102 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5103 ElementPHI->addIncoming(ElementNext.getPointer(), 5104 CGF.Builder.GetInsertBlock()); 5105 llvm::Value *IsEmpty = 5106 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5107 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5108 // Done. 5109 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5110 } 5111 5112 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5113 const OMPExecutableDirective &D, 5114 llvm::Function *TaskFunction, 5115 QualType SharedsTy, Address Shareds, 5116 const Expr *IfCond, 5117 const OMPTaskDataTy &Data) { 5118 if (!CGF.HaveInsertPoint()) 5119 return; 5120 5121 TaskResultTy Result = 5122 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5123 llvm::Value *NewTask = Result.NewTask; 5124 llvm::Function *TaskEntry = Result.TaskEntry; 5125 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5126 LValue TDBase = Result.TDBase; 5127 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5128 // Process list of dependences. 5129 Address DependenciesArray = Address::invalid(); 5130 llvm::Value *NumOfElements; 5131 std::tie(NumOfElements, DependenciesArray) = 5132 emitDependClause(CGF, Data.Dependences, Loc); 5133 5134 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5135 // libcall. 5136 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5137 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5138 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5139 // list is not empty 5140 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5141 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5142 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5143 llvm::Value *DepTaskArgs[7]; 5144 if (!Data.Dependences.empty()) { 5145 DepTaskArgs[0] = UpLoc; 5146 DepTaskArgs[1] = ThreadID; 5147 DepTaskArgs[2] = NewTask; 5148 DepTaskArgs[3] = NumOfElements; 5149 DepTaskArgs[4] = DependenciesArray.getPointer(); 5150 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5151 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5152 } 5153 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5154 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5155 if (!Data.Tied) { 5156 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5157 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5158 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5159 } 5160 if (!Data.Dependences.empty()) { 5161 CGF.EmitRuntimeCall( 5162 OMPBuilder.getOrCreateRuntimeFunction( 5163 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5164 DepTaskArgs); 5165 } else { 5166 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5167 CGM.getModule(), OMPRTL___kmpc_omp_task), 5168 TaskArgs); 5169 } 5170 // Check if parent region is untied and build return for untied task; 5171 if (auto *Region = 5172 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5173 Region->emitUntiedSwitch(CGF); 5174 }; 5175 5176 llvm::Value *DepWaitTaskArgs[6]; 5177 if (!Data.Dependences.empty()) { 5178 DepWaitTaskArgs[0] = UpLoc; 5179 DepWaitTaskArgs[1] = ThreadID; 5180 DepWaitTaskArgs[2] = NumOfElements; 5181 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5182 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5183 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5184 } 5185 auto &M = CGM.getModule(); 5186 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5187 TaskEntry, &Data, &DepWaitTaskArgs, 5188 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5189 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5190 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5191 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5192 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5193 // is specified. 5194 if (!Data.Dependences.empty()) 5195 CGF.EmitRuntimeCall( 5196 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5197 DepWaitTaskArgs); 5198 // Call proxy_task_entry(gtid, new_task); 5199 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5200 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5201 Action.Enter(CGF); 5202 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5203 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5204 OutlinedFnArgs); 5205 }; 5206 5207 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5208 // kmp_task_t *new_task); 5209 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5210 // kmp_task_t *new_task); 5211 RegionCodeGenTy RCG(CodeGen); 5212 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5213 M, OMPRTL___kmpc_omp_task_begin_if0), 5214 TaskArgs, 5215 OMPBuilder.getOrCreateRuntimeFunction( 5216 M, OMPRTL___kmpc_omp_task_complete_if0), 5217 TaskArgs); 5218 RCG.setAction(Action); 5219 RCG(CGF); 5220 }; 5221 5222 if (IfCond) { 5223 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5224 } else { 5225 RegionCodeGenTy ThenRCG(ThenCodeGen); 5226 ThenRCG(CGF); 5227 } 5228 } 5229 5230 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5231 const OMPLoopDirective &D, 5232 llvm::Function *TaskFunction, 5233 QualType SharedsTy, Address Shareds, 5234 const Expr *IfCond, 5235 const OMPTaskDataTy &Data) { 5236 if (!CGF.HaveInsertPoint()) 5237 return; 5238 TaskResultTy Result = 5239 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5240 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5241 // libcall. 5242 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5243 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5244 // sched, kmp_uint64 grainsize, void *task_dup); 5245 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5246 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5247 llvm::Value *IfVal; 5248 if (IfCond) { 5249 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5250 /*isSigned=*/true); 5251 } else { 5252 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5253 } 5254 5255 LValue LBLVal = CGF.EmitLValueForField( 5256 Result.TDBase, 5257 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5258 const auto *LBVar = 5259 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5260 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5261 LBLVal.getQuals(), 5262 /*IsInitializer=*/true); 5263 LValue UBLVal = CGF.EmitLValueForField( 5264 Result.TDBase, 5265 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5266 const auto *UBVar = 5267 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5268 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5269 UBLVal.getQuals(), 5270 /*IsInitializer=*/true); 5271 LValue StLVal = CGF.EmitLValueForField( 5272 Result.TDBase, 5273 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5274 const auto *StVar = 5275 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5276 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5277 StLVal.getQuals(), 5278 /*IsInitializer=*/true); 5279 // Store reductions address. 5280 LValue RedLVal = CGF.EmitLValueForField( 5281 Result.TDBase, 5282 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5283 if (Data.Reductions) { 5284 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5285 } else { 5286 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5287 CGF.getContext().VoidPtrTy); 5288 } 5289 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5290 llvm::Value *TaskArgs[] = { 5291 UpLoc, 5292 ThreadID, 5293 Result.NewTask, 5294 IfVal, 5295 LBLVal.getPointer(CGF), 5296 UBLVal.getPointer(CGF), 5297 CGF.EmitLoadOfScalar(StLVal, Loc), 5298 llvm::ConstantInt::getSigned( 5299 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5300 llvm::ConstantInt::getSigned( 5301 CGF.IntTy, Data.Schedule.getPointer() 5302 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5303 : NoSchedule), 5304 Data.Schedule.getPointer() 5305 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5306 /*isSigned=*/false) 5307 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5308 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5309 Result.TaskDupFn, CGF.VoidPtrTy) 5310 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5311 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5312 CGM.getModule(), OMPRTL___kmpc_taskloop), 5313 TaskArgs); 5314 } 5315 5316 /// Emit reduction operation for each element of array (required for 5317 /// array sections) LHS op = RHS. 5318 /// \param Type Type of array. 5319 /// \param LHSVar Variable on the left side of the reduction operation 5320 /// (references element of array in original variable). 5321 /// \param RHSVar Variable on the right side of the reduction operation 5322 /// (references element of array in original variable). 5323 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5324 /// RHSVar. 5325 static void EmitOMPAggregateReduction( 5326 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5327 const VarDecl *RHSVar, 5328 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5329 const Expr *, const Expr *)> &RedOpGen, 5330 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5331 const Expr *UpExpr = nullptr) { 5332 // Perform element-by-element initialization. 5333 QualType ElementTy; 5334 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5335 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5336 5337 // Drill down to the base element type on both arrays. 5338 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5339 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5340 5341 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5342 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5343 // Cast from pointer to array type to pointer to single element. 5344 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5345 // The basic structure here is a while-do loop. 5346 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5347 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5348 llvm::Value *IsEmpty = 5349 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5350 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5351 5352 // Enter the loop body, making that address the current address. 5353 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5354 CGF.EmitBlock(BodyBB); 5355 5356 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5357 5358 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5359 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5360 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5361 Address RHSElementCurrent = 5362 Address(RHSElementPHI, 5363 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5364 5365 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5366 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5367 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5368 Address LHSElementCurrent = 5369 Address(LHSElementPHI, 5370 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5371 5372 // Emit copy. 5373 CodeGenFunction::OMPPrivateScope Scope(CGF); 5374 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5375 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5376 Scope.Privatize(); 5377 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5378 Scope.ForceCleanup(); 5379 5380 // Shift the address forward by one element. 5381 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5382 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5383 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5384 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5385 // Check whether we've reached the end. 5386 llvm::Value *Done = 5387 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5388 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5389 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5390 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5391 5392 // Done. 5393 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5394 } 5395 5396 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5397 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5398 /// UDR combiner function. 5399 static void emitReductionCombiner(CodeGenFunction &CGF, 5400 const Expr *ReductionOp) { 5401 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5402 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5403 if (const auto *DRE = 5404 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5405 if (const auto *DRD = 5406 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5407 std::pair<llvm::Function *, llvm::Function *> Reduction = 5408 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5409 RValue Func = RValue::get(Reduction.first); 5410 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5411 CGF.EmitIgnoredExpr(ReductionOp); 5412 return; 5413 } 5414 CGF.EmitIgnoredExpr(ReductionOp); 5415 } 5416 5417 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5418 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5419 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5420 ArrayRef<const Expr *> ReductionOps) { 5421 ASTContext &C = CGM.getContext(); 5422 5423 // void reduction_func(void *LHSArg, void *RHSArg); 5424 FunctionArgList Args; 5425 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5426 ImplicitParamDecl::Other); 5427 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5428 ImplicitParamDecl::Other); 5429 Args.push_back(&LHSArg); 5430 Args.push_back(&RHSArg); 5431 const auto &CGFI = 5432 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5433 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5434 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5435 llvm::GlobalValue::InternalLinkage, Name, 5436 &CGM.getModule()); 5437 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5438 Fn->setDoesNotRecurse(); 5439 CodeGenFunction CGF(CGM); 5440 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5441 5442 // Dst = (void*[n])(LHSArg); 5443 // Src = (void*[n])(RHSArg); 5444 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5445 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5446 ArgsType), CGF.getPointerAlign()); 5447 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5448 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5449 ArgsType), CGF.getPointerAlign()); 5450 5451 // ... 5452 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5453 // ... 5454 CodeGenFunction::OMPPrivateScope Scope(CGF); 5455 auto IPriv = Privates.begin(); 5456 unsigned Idx = 0; 5457 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5458 const auto *RHSVar = 5459 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5460 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5461 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5462 }); 5463 const auto *LHSVar = 5464 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5465 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5466 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5467 }); 5468 QualType PrivTy = (*IPriv)->getType(); 5469 if (PrivTy->isVariablyModifiedType()) { 5470 // Get array size and emit VLA type. 5471 ++Idx; 5472 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5473 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5474 const VariableArrayType *VLA = 5475 CGF.getContext().getAsVariableArrayType(PrivTy); 5476 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5477 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5478 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5479 CGF.EmitVariablyModifiedType(PrivTy); 5480 } 5481 } 5482 Scope.Privatize(); 5483 IPriv = Privates.begin(); 5484 auto ILHS = LHSExprs.begin(); 5485 auto IRHS = RHSExprs.begin(); 5486 for (const Expr *E : ReductionOps) { 5487 if ((*IPriv)->getType()->isArrayType()) { 5488 // Emit reduction for array section. 5489 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5490 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5491 EmitOMPAggregateReduction( 5492 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5493 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5494 emitReductionCombiner(CGF, E); 5495 }); 5496 } else { 5497 // Emit reduction for array subscript or single variable. 5498 emitReductionCombiner(CGF, E); 5499 } 5500 ++IPriv; 5501 ++ILHS; 5502 ++IRHS; 5503 } 5504 Scope.ForceCleanup(); 5505 CGF.FinishFunction(); 5506 return Fn; 5507 } 5508 5509 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5510 const Expr *ReductionOp, 5511 const Expr *PrivateRef, 5512 const DeclRefExpr *LHS, 5513 const DeclRefExpr *RHS) { 5514 if (PrivateRef->getType()->isArrayType()) { 5515 // Emit reduction for array section. 5516 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5517 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5518 EmitOMPAggregateReduction( 5519 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5520 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5521 emitReductionCombiner(CGF, ReductionOp); 5522 }); 5523 } else { 5524 // Emit reduction for array subscript or single variable. 5525 emitReductionCombiner(CGF, ReductionOp); 5526 } 5527 } 5528 5529 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5530 ArrayRef<const Expr *> Privates, 5531 ArrayRef<const Expr *> LHSExprs, 5532 ArrayRef<const Expr *> RHSExprs, 5533 ArrayRef<const Expr *> ReductionOps, 5534 ReductionOptionsTy Options) { 5535 if (!CGF.HaveInsertPoint()) 5536 return; 5537 5538 bool WithNowait = Options.WithNowait; 5539 bool SimpleReduction = Options.SimpleReduction; 5540 5541 // Next code should be emitted for reduction: 5542 // 5543 // static kmp_critical_name lock = { 0 }; 5544 // 5545 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5546 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5547 // ... 5548 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5549 // *(Type<n>-1*)rhs[<n>-1]); 5550 // } 5551 // 5552 // ... 5553 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5554 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5555 // RedList, reduce_func, &<lock>)) { 5556 // case 1: 5557 // ... 5558 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5559 // ... 5560 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5561 // break; 5562 // case 2: 5563 // ... 5564 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5565 // ... 5566 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5567 // break; 5568 // default:; 5569 // } 5570 // 5571 // if SimpleReduction is true, only the next code is generated: 5572 // ... 5573 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5574 // ... 5575 5576 ASTContext &C = CGM.getContext(); 5577 5578 if (SimpleReduction) { 5579 CodeGenFunction::RunCleanupsScope Scope(CGF); 5580 auto IPriv = Privates.begin(); 5581 auto ILHS = LHSExprs.begin(); 5582 auto IRHS = RHSExprs.begin(); 5583 for (const Expr *E : ReductionOps) { 5584 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5585 cast<DeclRefExpr>(*IRHS)); 5586 ++IPriv; 5587 ++ILHS; 5588 ++IRHS; 5589 } 5590 return; 5591 } 5592 5593 // 1. Build a list of reduction variables. 5594 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5595 auto Size = RHSExprs.size(); 5596 for (const Expr *E : Privates) { 5597 if (E->getType()->isVariablyModifiedType()) 5598 // Reserve place for array size. 5599 ++Size; 5600 } 5601 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5602 QualType ReductionArrayTy = 5603 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5604 /*IndexTypeQuals=*/0); 5605 Address ReductionList = 5606 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5607 auto IPriv = Privates.begin(); 5608 unsigned Idx = 0; 5609 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5610 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5611 CGF.Builder.CreateStore( 5612 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5613 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5614 Elem); 5615 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5616 // Store array size. 5617 ++Idx; 5618 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5619 llvm::Value *Size = CGF.Builder.CreateIntCast( 5620 CGF.getVLASize( 5621 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5622 .NumElts, 5623 CGF.SizeTy, /*isSigned=*/false); 5624 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5625 Elem); 5626 } 5627 } 5628 5629 // 2. Emit reduce_func(). 5630 llvm::Function *ReductionFn = emitReductionFunction( 5631 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5632 LHSExprs, RHSExprs, ReductionOps); 5633 5634 // 3. Create static kmp_critical_name lock = { 0 }; 5635 std::string Name = getName({"reduction"}); 5636 llvm::Value *Lock = getCriticalRegionLock(Name); 5637 5638 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5639 // RedList, reduce_func, &<lock>); 5640 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5641 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5642 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5643 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5644 ReductionList.getPointer(), CGF.VoidPtrTy); 5645 llvm::Value *Args[] = { 5646 IdentTLoc, // ident_t *<loc> 5647 ThreadId, // i32 <gtid> 5648 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5649 ReductionArrayTySize, // size_type sizeof(RedList) 5650 RL, // void *RedList 5651 ReductionFn, // void (*) (void *, void *) <reduce_func> 5652 Lock // kmp_critical_name *&<lock> 5653 }; 5654 llvm::Value *Res = CGF.EmitRuntimeCall( 5655 OMPBuilder.getOrCreateRuntimeFunction( 5656 CGM.getModule(), 5657 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5658 Args); 5659 5660 // 5. Build switch(res) 5661 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5662 llvm::SwitchInst *SwInst = 5663 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5664 5665 // 6. Build case 1: 5666 // ... 5667 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5668 // ... 5669 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5670 // break; 5671 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5672 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5673 CGF.EmitBlock(Case1BB); 5674 5675 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5676 llvm::Value *EndArgs[] = { 5677 IdentTLoc, // ident_t *<loc> 5678 ThreadId, // i32 <gtid> 5679 Lock // kmp_critical_name *&<lock> 5680 }; 5681 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5682 CodeGenFunction &CGF, PrePostActionTy &Action) { 5683 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5684 auto IPriv = Privates.begin(); 5685 auto ILHS = LHSExprs.begin(); 5686 auto IRHS = RHSExprs.begin(); 5687 for (const Expr *E : ReductionOps) { 5688 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5689 cast<DeclRefExpr>(*IRHS)); 5690 ++IPriv; 5691 ++ILHS; 5692 ++IRHS; 5693 } 5694 }; 5695 RegionCodeGenTy RCG(CodeGen); 5696 CommonActionTy Action( 5697 nullptr, llvm::None, 5698 OMPBuilder.getOrCreateRuntimeFunction( 5699 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5700 : OMPRTL___kmpc_end_reduce), 5701 EndArgs); 5702 RCG.setAction(Action); 5703 RCG(CGF); 5704 5705 CGF.EmitBranch(DefaultBB); 5706 5707 // 7. Build case 2: 5708 // ... 5709 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5710 // ... 5711 // break; 5712 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5713 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5714 CGF.EmitBlock(Case2BB); 5715 5716 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5717 CodeGenFunction &CGF, PrePostActionTy &Action) { 5718 auto ILHS = LHSExprs.begin(); 5719 auto IRHS = RHSExprs.begin(); 5720 auto IPriv = Privates.begin(); 5721 for (const Expr *E : ReductionOps) { 5722 const Expr *XExpr = nullptr; 5723 const Expr *EExpr = nullptr; 5724 const Expr *UpExpr = nullptr; 5725 BinaryOperatorKind BO = BO_Comma; 5726 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5727 if (BO->getOpcode() == BO_Assign) { 5728 XExpr = BO->getLHS(); 5729 UpExpr = BO->getRHS(); 5730 } 5731 } 5732 // Try to emit update expression as a simple atomic. 5733 const Expr *RHSExpr = UpExpr; 5734 if (RHSExpr) { 5735 // Analyze RHS part of the whole expression. 5736 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5737 RHSExpr->IgnoreParenImpCasts())) { 5738 // If this is a conditional operator, analyze its condition for 5739 // min/max reduction operator. 5740 RHSExpr = ACO->getCond(); 5741 } 5742 if (const auto *BORHS = 5743 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5744 EExpr = BORHS->getRHS(); 5745 BO = BORHS->getOpcode(); 5746 } 5747 } 5748 if (XExpr) { 5749 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5750 auto &&AtomicRedGen = [BO, VD, 5751 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5752 const Expr *EExpr, const Expr *UpExpr) { 5753 LValue X = CGF.EmitLValue(XExpr); 5754 RValue E; 5755 if (EExpr) 5756 E = CGF.EmitAnyExpr(EExpr); 5757 CGF.EmitOMPAtomicSimpleUpdateExpr( 5758 X, E, BO, /*IsXLHSInRHSPart=*/true, 5759 llvm::AtomicOrdering::Monotonic, Loc, 5760 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5761 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5762 PrivateScope.addPrivate( 5763 VD, [&CGF, VD, XRValue, Loc]() { 5764 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5765 CGF.emitOMPSimpleStore( 5766 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5767 VD->getType().getNonReferenceType(), Loc); 5768 return LHSTemp; 5769 }); 5770 (void)PrivateScope.Privatize(); 5771 return CGF.EmitAnyExpr(UpExpr); 5772 }); 5773 }; 5774 if ((*IPriv)->getType()->isArrayType()) { 5775 // Emit atomic reduction for array section. 5776 const auto *RHSVar = 5777 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5778 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5779 AtomicRedGen, XExpr, EExpr, UpExpr); 5780 } else { 5781 // Emit atomic reduction for array subscript or single variable. 5782 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5783 } 5784 } else { 5785 // Emit as a critical region. 5786 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5787 const Expr *, const Expr *) { 5788 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5789 std::string Name = RT.getName({"atomic_reduction"}); 5790 RT.emitCriticalRegion( 5791 CGF, Name, 5792 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5793 Action.Enter(CGF); 5794 emitReductionCombiner(CGF, E); 5795 }, 5796 Loc); 5797 }; 5798 if ((*IPriv)->getType()->isArrayType()) { 5799 const auto *LHSVar = 5800 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5801 const auto *RHSVar = 5802 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5803 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5804 CritRedGen); 5805 } else { 5806 CritRedGen(CGF, nullptr, nullptr, nullptr); 5807 } 5808 } 5809 ++ILHS; 5810 ++IRHS; 5811 ++IPriv; 5812 } 5813 }; 5814 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5815 if (!WithNowait) { 5816 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5817 llvm::Value *EndArgs[] = { 5818 IdentTLoc, // ident_t *<loc> 5819 ThreadId, // i32 <gtid> 5820 Lock // kmp_critical_name *&<lock> 5821 }; 5822 CommonActionTy Action(nullptr, llvm::None, 5823 OMPBuilder.getOrCreateRuntimeFunction( 5824 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5825 EndArgs); 5826 AtomicRCG.setAction(Action); 5827 AtomicRCG(CGF); 5828 } else { 5829 AtomicRCG(CGF); 5830 } 5831 5832 CGF.EmitBranch(DefaultBB); 5833 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5834 } 5835 5836 /// Generates unique name for artificial threadprivate variables. 5837 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5838 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5839 const Expr *Ref) { 5840 SmallString<256> Buffer; 5841 llvm::raw_svector_ostream Out(Buffer); 5842 const clang::DeclRefExpr *DE; 5843 const VarDecl *D = ::getBaseDecl(Ref, DE); 5844 if (!D) 5845 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5846 D = D->getCanonicalDecl(); 5847 std::string Name = CGM.getOpenMPRuntime().getName( 5848 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5849 Out << Prefix << Name << "_" 5850 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5851 return std::string(Out.str()); 5852 } 5853 5854 /// Emits reduction initializer function: 5855 /// \code 5856 /// void @.red_init(void* %arg, void* %orig) { 5857 /// %0 = bitcast void* %arg to <type>* 5858 /// store <type> <init>, <type>* %0 5859 /// ret void 5860 /// } 5861 /// \endcode 5862 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5863 SourceLocation Loc, 5864 ReductionCodeGen &RCG, unsigned N) { 5865 ASTContext &C = CGM.getContext(); 5866 QualType VoidPtrTy = C.VoidPtrTy; 5867 VoidPtrTy.addRestrict(); 5868 FunctionArgList Args; 5869 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5870 ImplicitParamDecl::Other); 5871 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5872 ImplicitParamDecl::Other); 5873 Args.emplace_back(&Param); 5874 Args.emplace_back(&ParamOrig); 5875 const auto &FnInfo = 5876 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5877 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5878 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5879 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5880 Name, &CGM.getModule()); 5881 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5882 Fn->setDoesNotRecurse(); 5883 CodeGenFunction CGF(CGM); 5884 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5885 Address PrivateAddr = CGF.EmitLoadOfPointer( 5886 CGF.GetAddrOfLocalVar(&Param), 5887 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5888 llvm::Value *Size = nullptr; 5889 // If the size of the reduction item is non-constant, load it from global 5890 // threadprivate variable. 5891 if (RCG.getSizes(N).second) { 5892 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5893 CGF, CGM.getContext().getSizeType(), 5894 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5895 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5896 CGM.getContext().getSizeType(), Loc); 5897 } 5898 RCG.emitAggregateType(CGF, N, Size); 5899 LValue OrigLVal; 5900 // If initializer uses initializer from declare reduction construct, emit a 5901 // pointer to the address of the original reduction item (reuired by reduction 5902 // initializer) 5903 if (RCG.usesReductionInitializer(N)) { 5904 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5905 SharedAddr = CGF.EmitLoadOfPointer( 5906 SharedAddr, 5907 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5908 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5909 } else { 5910 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5911 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5912 CGM.getContext().VoidPtrTy); 5913 } 5914 // Emit the initializer: 5915 // %0 = bitcast void* %arg to <type>* 5916 // store <type> <init>, <type>* %0 5917 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5918 [](CodeGenFunction &) { return false; }); 5919 CGF.FinishFunction(); 5920 return Fn; 5921 } 5922 5923 /// Emits reduction combiner function: 5924 /// \code 5925 /// void @.red_comb(void* %arg0, void* %arg1) { 5926 /// %lhs = bitcast void* %arg0 to <type>* 5927 /// %rhs = bitcast void* %arg1 to <type>* 5928 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5929 /// store <type> %2, <type>* %lhs 5930 /// ret void 5931 /// } 5932 /// \endcode 5933 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5934 SourceLocation Loc, 5935 ReductionCodeGen &RCG, unsigned N, 5936 const Expr *ReductionOp, 5937 const Expr *LHS, const Expr *RHS, 5938 const Expr *PrivateRef) { 5939 ASTContext &C = CGM.getContext(); 5940 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5941 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5942 FunctionArgList Args; 5943 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5944 C.VoidPtrTy, ImplicitParamDecl::Other); 5945 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5946 ImplicitParamDecl::Other); 5947 Args.emplace_back(&ParamInOut); 5948 Args.emplace_back(&ParamIn); 5949 const auto &FnInfo = 5950 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5951 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5952 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5953 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5954 Name, &CGM.getModule()); 5955 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5956 Fn->setDoesNotRecurse(); 5957 CodeGenFunction CGF(CGM); 5958 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5959 llvm::Value *Size = nullptr; 5960 // If the size of the reduction item is non-constant, load it from global 5961 // threadprivate variable. 5962 if (RCG.getSizes(N).second) { 5963 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5964 CGF, CGM.getContext().getSizeType(), 5965 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5966 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5967 CGM.getContext().getSizeType(), Loc); 5968 } 5969 RCG.emitAggregateType(CGF, N, Size); 5970 // Remap lhs and rhs variables to the addresses of the function arguments. 5971 // %lhs = bitcast void* %arg0 to <type>* 5972 // %rhs = bitcast void* %arg1 to <type>* 5973 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5974 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5975 // Pull out the pointer to the variable. 5976 Address PtrAddr = CGF.EmitLoadOfPointer( 5977 CGF.GetAddrOfLocalVar(&ParamInOut), 5978 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5979 return CGF.Builder.CreateElementBitCast( 5980 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5981 }); 5982 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5983 // Pull out the pointer to the variable. 5984 Address PtrAddr = CGF.EmitLoadOfPointer( 5985 CGF.GetAddrOfLocalVar(&ParamIn), 5986 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5987 return CGF.Builder.CreateElementBitCast( 5988 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5989 }); 5990 PrivateScope.Privatize(); 5991 // Emit the combiner body: 5992 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5993 // store <type> %2, <type>* %lhs 5994 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5995 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5996 cast<DeclRefExpr>(RHS)); 5997 CGF.FinishFunction(); 5998 return Fn; 5999 } 6000 6001 /// Emits reduction finalizer function: 6002 /// \code 6003 /// void @.red_fini(void* %arg) { 6004 /// %0 = bitcast void* %arg to <type>* 6005 /// <destroy>(<type>* %0) 6006 /// ret void 6007 /// } 6008 /// \endcode 6009 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6010 SourceLocation Loc, 6011 ReductionCodeGen &RCG, unsigned N) { 6012 if (!RCG.needCleanups(N)) 6013 return nullptr; 6014 ASTContext &C = CGM.getContext(); 6015 FunctionArgList Args; 6016 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6017 ImplicitParamDecl::Other); 6018 Args.emplace_back(&Param); 6019 const auto &FnInfo = 6020 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6021 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6022 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6023 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6024 Name, &CGM.getModule()); 6025 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6026 Fn->setDoesNotRecurse(); 6027 CodeGenFunction CGF(CGM); 6028 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6029 Address PrivateAddr = CGF.EmitLoadOfPointer( 6030 CGF.GetAddrOfLocalVar(&Param), 6031 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6032 llvm::Value *Size = nullptr; 6033 // If the size of the reduction item is non-constant, load it from global 6034 // threadprivate variable. 6035 if (RCG.getSizes(N).second) { 6036 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6037 CGF, CGM.getContext().getSizeType(), 6038 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6039 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6040 CGM.getContext().getSizeType(), Loc); 6041 } 6042 RCG.emitAggregateType(CGF, N, Size); 6043 // Emit the finalizer body: 6044 // <destroy>(<type>* %0) 6045 RCG.emitCleanups(CGF, N, PrivateAddr); 6046 CGF.FinishFunction(Loc); 6047 return Fn; 6048 } 6049 6050 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6051 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6052 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6053 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6054 return nullptr; 6055 6056 // Build typedef struct: 6057 // kmp_taskred_input { 6058 // void *reduce_shar; // shared reduction item 6059 // void *reduce_orig; // original reduction item used for initialization 6060 // size_t reduce_size; // size of data item 6061 // void *reduce_init; // data initialization routine 6062 // void *reduce_fini; // data finalization routine 6063 // void *reduce_comb; // data combiner routine 6064 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6065 // } kmp_taskred_input_t; 6066 ASTContext &C = CGM.getContext(); 6067 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6068 RD->startDefinition(); 6069 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6070 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6071 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6072 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6073 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6074 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6075 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6076 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6077 RD->completeDefinition(); 6078 QualType RDType = C.getRecordType(RD); 6079 unsigned Size = Data.ReductionVars.size(); 6080 llvm::APInt ArraySize(/*numBits=*/64, Size); 6081 QualType ArrayRDType = C.getConstantArrayType( 6082 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6083 // kmp_task_red_input_t .rd_input.[Size]; 6084 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6085 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6086 Data.ReductionCopies, Data.ReductionOps); 6087 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6088 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6089 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6090 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6091 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6092 TaskRedInput.getPointer(), Idxs, 6093 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6094 ".rd_input.gep."); 6095 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6096 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6097 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6098 RCG.emitSharedOrigLValue(CGF, Cnt); 6099 llvm::Value *CastedShared = 6100 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6101 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6102 // ElemLVal.reduce_orig = &Origs[Cnt]; 6103 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6104 llvm::Value *CastedOrig = 6105 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6106 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6107 RCG.emitAggregateType(CGF, Cnt); 6108 llvm::Value *SizeValInChars; 6109 llvm::Value *SizeVal; 6110 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6111 // We use delayed creation/initialization for VLAs and array sections. It is 6112 // required because runtime does not provide the way to pass the sizes of 6113 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6114 // threadprivate global variables are used to store these values and use 6115 // them in the functions. 6116 bool DelayedCreation = !!SizeVal; 6117 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6118 /*isSigned=*/false); 6119 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6120 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6121 // ElemLVal.reduce_init = init; 6122 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6123 llvm::Value *InitAddr = 6124 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6125 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6126 // ElemLVal.reduce_fini = fini; 6127 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6128 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6129 llvm::Value *FiniAddr = Fini 6130 ? CGF.EmitCastToVoidPtr(Fini) 6131 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6132 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6133 // ElemLVal.reduce_comb = comb; 6134 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6135 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6136 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6137 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6138 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6139 // ElemLVal.flags = 0; 6140 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6141 if (DelayedCreation) { 6142 CGF.EmitStoreOfScalar( 6143 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6144 FlagsLVal); 6145 } else 6146 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6147 FlagsLVal.getType()); 6148 } 6149 if (Data.IsReductionWithTaskMod) { 6150 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6151 // is_ws, int num, void *data); 6152 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6153 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6154 CGM.IntTy, /*isSigned=*/true); 6155 llvm::Value *Args[] = { 6156 IdentTLoc, GTid, 6157 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6158 /*isSigned=*/true), 6159 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6160 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6161 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6162 return CGF.EmitRuntimeCall( 6163 OMPBuilder.getOrCreateRuntimeFunction( 6164 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6165 Args); 6166 } 6167 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6168 llvm::Value *Args[] = { 6169 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6170 /*isSigned=*/true), 6171 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6172 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6173 CGM.VoidPtrTy)}; 6174 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6175 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6176 Args); 6177 } 6178 6179 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6180 SourceLocation Loc, 6181 bool IsWorksharingReduction) { 6182 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6183 // is_ws, int num, void *data); 6184 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6185 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6186 CGM.IntTy, /*isSigned=*/true); 6187 llvm::Value *Args[] = {IdentTLoc, GTid, 6188 llvm::ConstantInt::get(CGM.IntTy, 6189 IsWorksharingReduction ? 1 : 0, 6190 /*isSigned=*/true)}; 6191 (void)CGF.EmitRuntimeCall( 6192 OMPBuilder.getOrCreateRuntimeFunction( 6193 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6194 Args); 6195 } 6196 6197 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6198 SourceLocation Loc, 6199 ReductionCodeGen &RCG, 6200 unsigned N) { 6201 auto Sizes = RCG.getSizes(N); 6202 // Emit threadprivate global variable if the type is non-constant 6203 // (Sizes.second = nullptr). 6204 if (Sizes.second) { 6205 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6206 /*isSigned=*/false); 6207 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6208 CGF, CGM.getContext().getSizeType(), 6209 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6210 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6211 } 6212 } 6213 6214 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6215 SourceLocation Loc, 6216 llvm::Value *ReductionsPtr, 6217 LValue SharedLVal) { 6218 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6219 // *d); 6220 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6221 CGM.IntTy, 6222 /*isSigned=*/true), 6223 ReductionsPtr, 6224 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6225 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6226 return Address( 6227 CGF.EmitRuntimeCall( 6228 OMPBuilder.getOrCreateRuntimeFunction( 6229 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6230 Args), 6231 SharedLVal.getAlignment()); 6232 } 6233 6234 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6235 SourceLocation Loc) { 6236 if (!CGF.HaveInsertPoint()) 6237 return; 6238 6239 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6240 OMPBuilder.createTaskwait(CGF.Builder); 6241 } else { 6242 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6243 // global_tid); 6244 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6245 // Ignore return result until untied tasks are supported. 6246 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6247 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6248 Args); 6249 } 6250 6251 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6252 Region->emitUntiedSwitch(CGF); 6253 } 6254 6255 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6256 OpenMPDirectiveKind InnerKind, 6257 const RegionCodeGenTy &CodeGen, 6258 bool HasCancel) { 6259 if (!CGF.HaveInsertPoint()) 6260 return; 6261 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6262 InnerKind != OMPD_critical && 6263 InnerKind != OMPD_master && 6264 InnerKind != OMPD_masked); 6265 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6266 } 6267 6268 namespace { 6269 enum RTCancelKind { 6270 CancelNoreq = 0, 6271 CancelParallel = 1, 6272 CancelLoop = 2, 6273 CancelSections = 3, 6274 CancelTaskgroup = 4 6275 }; 6276 } // anonymous namespace 6277 6278 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6279 RTCancelKind CancelKind = CancelNoreq; 6280 if (CancelRegion == OMPD_parallel) 6281 CancelKind = CancelParallel; 6282 else if (CancelRegion == OMPD_for) 6283 CancelKind = CancelLoop; 6284 else if (CancelRegion == OMPD_sections) 6285 CancelKind = CancelSections; 6286 else { 6287 assert(CancelRegion == OMPD_taskgroup); 6288 CancelKind = CancelTaskgroup; 6289 } 6290 return CancelKind; 6291 } 6292 6293 void CGOpenMPRuntime::emitCancellationPointCall( 6294 CodeGenFunction &CGF, SourceLocation Loc, 6295 OpenMPDirectiveKind CancelRegion) { 6296 if (!CGF.HaveInsertPoint()) 6297 return; 6298 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6299 // global_tid, kmp_int32 cncl_kind); 6300 if (auto *OMPRegionInfo = 6301 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6302 // For 'cancellation point taskgroup', the task region info may not have a 6303 // cancel. This may instead happen in another adjacent task. 6304 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6305 llvm::Value *Args[] = { 6306 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6307 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6308 // Ignore return result until untied tasks are supported. 6309 llvm::Value *Result = CGF.EmitRuntimeCall( 6310 OMPBuilder.getOrCreateRuntimeFunction( 6311 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6312 Args); 6313 // if (__kmpc_cancellationpoint()) { 6314 // exit from construct; 6315 // } 6316 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6317 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6318 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6319 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6320 CGF.EmitBlock(ExitBB); 6321 // exit from construct; 6322 CodeGenFunction::JumpDest CancelDest = 6323 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6324 CGF.EmitBranchThroughCleanup(CancelDest); 6325 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6326 } 6327 } 6328 } 6329 6330 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6331 const Expr *IfCond, 6332 OpenMPDirectiveKind CancelRegion) { 6333 if (!CGF.HaveInsertPoint()) 6334 return; 6335 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6336 // kmp_int32 cncl_kind); 6337 auto &M = CGM.getModule(); 6338 if (auto *OMPRegionInfo = 6339 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6340 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6341 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6342 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6343 llvm::Value *Args[] = { 6344 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6345 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6346 // Ignore return result until untied tasks are supported. 6347 llvm::Value *Result = CGF.EmitRuntimeCall( 6348 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6349 // if (__kmpc_cancel()) { 6350 // exit from construct; 6351 // } 6352 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6353 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6354 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6355 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6356 CGF.EmitBlock(ExitBB); 6357 // exit from construct; 6358 CodeGenFunction::JumpDest CancelDest = 6359 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6360 CGF.EmitBranchThroughCleanup(CancelDest); 6361 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6362 }; 6363 if (IfCond) { 6364 emitIfClause(CGF, IfCond, ThenGen, 6365 [](CodeGenFunction &, PrePostActionTy &) {}); 6366 } else { 6367 RegionCodeGenTy ThenRCG(ThenGen); 6368 ThenRCG(CGF); 6369 } 6370 } 6371 } 6372 6373 namespace { 6374 /// Cleanup action for uses_allocators support. 6375 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6376 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6377 6378 public: 6379 OMPUsesAllocatorsActionTy( 6380 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6381 : Allocators(Allocators) {} 6382 void Enter(CodeGenFunction &CGF) override { 6383 if (!CGF.HaveInsertPoint()) 6384 return; 6385 for (const auto &AllocatorData : Allocators) { 6386 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6387 CGF, AllocatorData.first, AllocatorData.second); 6388 } 6389 } 6390 void Exit(CodeGenFunction &CGF) override { 6391 if (!CGF.HaveInsertPoint()) 6392 return; 6393 for (const auto &AllocatorData : Allocators) { 6394 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6395 AllocatorData.first); 6396 } 6397 } 6398 }; 6399 } // namespace 6400 6401 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6402 const OMPExecutableDirective &D, StringRef ParentName, 6403 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6404 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6405 assert(!ParentName.empty() && "Invalid target region parent name!"); 6406 HasEmittedTargetRegion = true; 6407 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6408 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6409 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6410 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6411 if (!D.AllocatorTraits) 6412 continue; 6413 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6414 } 6415 } 6416 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6417 CodeGen.setAction(UsesAllocatorAction); 6418 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6419 IsOffloadEntry, CodeGen); 6420 } 6421 6422 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6423 const Expr *Allocator, 6424 const Expr *AllocatorTraits) { 6425 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6426 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6427 // Use default memspace handle. 6428 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6429 llvm::Value *NumTraits = llvm::ConstantInt::get( 6430 CGF.IntTy, cast<ConstantArrayType>( 6431 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6432 ->getSize() 6433 .getLimitedValue()); 6434 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6435 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6436 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6437 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6438 AllocatorTraitsLVal.getBaseInfo(), 6439 AllocatorTraitsLVal.getTBAAInfo()); 6440 llvm::Value *Traits = 6441 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6442 6443 llvm::Value *AllocatorVal = 6444 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6445 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6446 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6447 // Store to allocator. 6448 CGF.EmitVarDecl(*cast<VarDecl>( 6449 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6450 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6451 AllocatorVal = 6452 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6453 Allocator->getType(), Allocator->getExprLoc()); 6454 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6455 } 6456 6457 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6458 const Expr *Allocator) { 6459 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6460 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6461 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6462 llvm::Value *AllocatorVal = 6463 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6464 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6465 CGF.getContext().VoidPtrTy, 6466 Allocator->getExprLoc()); 6467 (void)CGF.EmitRuntimeCall( 6468 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6469 OMPRTL___kmpc_destroy_allocator), 6470 {ThreadId, AllocatorVal}); 6471 } 6472 6473 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6474 const OMPExecutableDirective &D, StringRef ParentName, 6475 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6476 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6477 // Create a unique name for the entry function using the source location 6478 // information of the current target region. The name will be something like: 6479 // 6480 // __omp_offloading_DD_FFFF_PP_lBB 6481 // 6482 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6483 // mangled name of the function that encloses the target region and BB is the 6484 // line number of the target region. 6485 6486 unsigned DeviceID; 6487 unsigned FileID; 6488 unsigned Line; 6489 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6490 Line); 6491 SmallString<64> EntryFnName; 6492 { 6493 llvm::raw_svector_ostream OS(EntryFnName); 6494 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6495 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6496 } 6497 6498 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6499 6500 CodeGenFunction CGF(CGM, true); 6501 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6502 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6503 6504 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6505 6506 // If this target outline function is not an offload entry, we don't need to 6507 // register it. 6508 if (!IsOffloadEntry) 6509 return; 6510 6511 // The target region ID is used by the runtime library to identify the current 6512 // target region, so it only has to be unique and not necessarily point to 6513 // anything. It could be the pointer to the outlined function that implements 6514 // the target region, but we aren't using that so that the compiler doesn't 6515 // need to keep that, and could therefore inline the host function if proven 6516 // worthwhile during optimization. In the other hand, if emitting code for the 6517 // device, the ID has to be the function address so that it can retrieved from 6518 // the offloading entry and launched by the runtime library. We also mark the 6519 // outlined function to have external linkage in case we are emitting code for 6520 // the device, because these functions will be entry points to the device. 6521 6522 if (CGM.getLangOpts().OpenMPIsDevice) { 6523 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6524 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6525 OutlinedFn->setDSOLocal(false); 6526 if (CGM.getTriple().isAMDGCN()) 6527 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6528 } else { 6529 std::string Name = getName({EntryFnName, "region_id"}); 6530 OutlinedFnID = new llvm::GlobalVariable( 6531 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6532 llvm::GlobalValue::WeakAnyLinkage, 6533 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6534 } 6535 6536 // Register the information for the entry associated with this target region. 6537 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6538 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6539 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6540 } 6541 6542 /// Checks if the expression is constant or does not have non-trivial function 6543 /// calls. 6544 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6545 // We can skip constant expressions. 6546 // We can skip expressions with trivial calls or simple expressions. 6547 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6548 !E->hasNonTrivialCall(Ctx)) && 6549 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6550 } 6551 6552 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6553 const Stmt *Body) { 6554 const Stmt *Child = Body->IgnoreContainers(); 6555 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6556 Child = nullptr; 6557 for (const Stmt *S : C->body()) { 6558 if (const auto *E = dyn_cast<Expr>(S)) { 6559 if (isTrivial(Ctx, E)) 6560 continue; 6561 } 6562 // Some of the statements can be ignored. 6563 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6564 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6565 continue; 6566 // Analyze declarations. 6567 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6568 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6569 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6570 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6571 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6572 isa<UsingDirectiveDecl>(D) || 6573 isa<OMPDeclareReductionDecl>(D) || 6574 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6575 return true; 6576 const auto *VD = dyn_cast<VarDecl>(D); 6577 if (!VD) 6578 return false; 6579 return VD->isConstexpr() || 6580 ((VD->getType().isTrivialType(Ctx) || 6581 VD->getType()->isReferenceType()) && 6582 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6583 })) 6584 continue; 6585 } 6586 // Found multiple children - cannot get the one child only. 6587 if (Child) 6588 return nullptr; 6589 Child = S; 6590 } 6591 if (Child) 6592 Child = Child->IgnoreContainers(); 6593 } 6594 return Child; 6595 } 6596 6597 /// Emit the number of teams for a target directive. Inspect the num_teams 6598 /// clause associated with a teams construct combined or closely nested 6599 /// with the target directive. 6600 /// 6601 /// Emit a team of size one for directives such as 'target parallel' that 6602 /// have no associated teams construct. 6603 /// 6604 /// Otherwise, return nullptr. 6605 static llvm::Value * 6606 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6607 const OMPExecutableDirective &D) { 6608 assert(!CGF.getLangOpts().OpenMPIsDevice && 6609 "Clauses associated with the teams directive expected to be emitted " 6610 "only for the host!"); 6611 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6612 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6613 "Expected target-based executable directive."); 6614 CGBuilderTy &Bld = CGF.Builder; 6615 switch (DirectiveKind) { 6616 case OMPD_target: { 6617 const auto *CS = D.getInnermostCapturedStmt(); 6618 const auto *Body = 6619 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6620 const Stmt *ChildStmt = 6621 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6622 if (const auto *NestedDir = 6623 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6624 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6625 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6626 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6627 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6628 const Expr *NumTeams = 6629 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6630 llvm::Value *NumTeamsVal = 6631 CGF.EmitScalarExpr(NumTeams, 6632 /*IgnoreResultAssign*/ true); 6633 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6634 /*isSigned=*/true); 6635 } 6636 return Bld.getInt32(0); 6637 } 6638 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6639 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6640 return Bld.getInt32(1); 6641 return Bld.getInt32(0); 6642 } 6643 return nullptr; 6644 } 6645 case OMPD_target_teams: 6646 case OMPD_target_teams_distribute: 6647 case OMPD_target_teams_distribute_simd: 6648 case OMPD_target_teams_distribute_parallel_for: 6649 case OMPD_target_teams_distribute_parallel_for_simd: { 6650 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6651 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6652 const Expr *NumTeams = 6653 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6654 llvm::Value *NumTeamsVal = 6655 CGF.EmitScalarExpr(NumTeams, 6656 /*IgnoreResultAssign*/ true); 6657 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6658 /*isSigned=*/true); 6659 } 6660 return Bld.getInt32(0); 6661 } 6662 case OMPD_target_parallel: 6663 case OMPD_target_parallel_for: 6664 case OMPD_target_parallel_for_simd: 6665 case OMPD_target_simd: 6666 return Bld.getInt32(1); 6667 case OMPD_parallel: 6668 case OMPD_for: 6669 case OMPD_parallel_for: 6670 case OMPD_parallel_master: 6671 case OMPD_parallel_sections: 6672 case OMPD_for_simd: 6673 case OMPD_parallel_for_simd: 6674 case OMPD_cancel: 6675 case OMPD_cancellation_point: 6676 case OMPD_ordered: 6677 case OMPD_threadprivate: 6678 case OMPD_allocate: 6679 case OMPD_task: 6680 case OMPD_simd: 6681 case OMPD_tile: 6682 case OMPD_sections: 6683 case OMPD_section: 6684 case OMPD_single: 6685 case OMPD_master: 6686 case OMPD_critical: 6687 case OMPD_taskyield: 6688 case OMPD_barrier: 6689 case OMPD_taskwait: 6690 case OMPD_taskgroup: 6691 case OMPD_atomic: 6692 case OMPD_flush: 6693 case OMPD_depobj: 6694 case OMPD_scan: 6695 case OMPD_teams: 6696 case OMPD_target_data: 6697 case OMPD_target_exit_data: 6698 case OMPD_target_enter_data: 6699 case OMPD_distribute: 6700 case OMPD_distribute_simd: 6701 case OMPD_distribute_parallel_for: 6702 case OMPD_distribute_parallel_for_simd: 6703 case OMPD_teams_distribute: 6704 case OMPD_teams_distribute_simd: 6705 case OMPD_teams_distribute_parallel_for: 6706 case OMPD_teams_distribute_parallel_for_simd: 6707 case OMPD_target_update: 6708 case OMPD_declare_simd: 6709 case OMPD_declare_variant: 6710 case OMPD_begin_declare_variant: 6711 case OMPD_end_declare_variant: 6712 case OMPD_declare_target: 6713 case OMPD_end_declare_target: 6714 case OMPD_declare_reduction: 6715 case OMPD_declare_mapper: 6716 case OMPD_taskloop: 6717 case OMPD_taskloop_simd: 6718 case OMPD_master_taskloop: 6719 case OMPD_master_taskloop_simd: 6720 case OMPD_parallel_master_taskloop: 6721 case OMPD_parallel_master_taskloop_simd: 6722 case OMPD_requires: 6723 case OMPD_unknown: 6724 break; 6725 default: 6726 break; 6727 } 6728 llvm_unreachable("Unexpected directive kind."); 6729 } 6730 6731 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6732 llvm::Value *DefaultThreadLimitVal) { 6733 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6734 CGF.getContext(), CS->getCapturedStmt()); 6735 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6736 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6737 llvm::Value *NumThreads = nullptr; 6738 llvm::Value *CondVal = nullptr; 6739 // Handle if clause. If if clause present, the number of threads is 6740 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6741 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6742 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6743 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6744 const OMPIfClause *IfClause = nullptr; 6745 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6746 if (C->getNameModifier() == OMPD_unknown || 6747 C->getNameModifier() == OMPD_parallel) { 6748 IfClause = C; 6749 break; 6750 } 6751 } 6752 if (IfClause) { 6753 const Expr *Cond = IfClause->getCondition(); 6754 bool Result; 6755 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6756 if (!Result) 6757 return CGF.Builder.getInt32(1); 6758 } else { 6759 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6760 if (const auto *PreInit = 6761 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6762 for (const auto *I : PreInit->decls()) { 6763 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6764 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6765 } else { 6766 CodeGenFunction::AutoVarEmission Emission = 6767 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6768 CGF.EmitAutoVarCleanups(Emission); 6769 } 6770 } 6771 } 6772 CondVal = CGF.EvaluateExprAsBool(Cond); 6773 } 6774 } 6775 } 6776 // Check the value of num_threads clause iff if clause was not specified 6777 // or is not evaluated to false. 6778 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6779 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6780 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6781 const auto *NumThreadsClause = 6782 Dir->getSingleClause<OMPNumThreadsClause>(); 6783 CodeGenFunction::LexicalScope Scope( 6784 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6785 if (const auto *PreInit = 6786 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6787 for (const auto *I : PreInit->decls()) { 6788 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6789 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6790 } else { 6791 CodeGenFunction::AutoVarEmission Emission = 6792 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6793 CGF.EmitAutoVarCleanups(Emission); 6794 } 6795 } 6796 } 6797 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6798 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6799 /*isSigned=*/false); 6800 if (DefaultThreadLimitVal) 6801 NumThreads = CGF.Builder.CreateSelect( 6802 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6803 DefaultThreadLimitVal, NumThreads); 6804 } else { 6805 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6806 : CGF.Builder.getInt32(0); 6807 } 6808 // Process condition of the if clause. 6809 if (CondVal) { 6810 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6811 CGF.Builder.getInt32(1)); 6812 } 6813 return NumThreads; 6814 } 6815 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6816 return CGF.Builder.getInt32(1); 6817 return DefaultThreadLimitVal; 6818 } 6819 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6820 : CGF.Builder.getInt32(0); 6821 } 6822 6823 /// Emit the number of threads for a target directive. Inspect the 6824 /// thread_limit clause associated with a teams construct combined or closely 6825 /// nested with the target directive. 6826 /// 6827 /// Emit the num_threads clause for directives such as 'target parallel' that 6828 /// have no associated teams construct. 6829 /// 6830 /// Otherwise, return nullptr. 6831 static llvm::Value * 6832 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6833 const OMPExecutableDirective &D) { 6834 assert(!CGF.getLangOpts().OpenMPIsDevice && 6835 "Clauses associated with the teams directive expected to be emitted " 6836 "only for the host!"); 6837 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6838 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6839 "Expected target-based executable directive."); 6840 CGBuilderTy &Bld = CGF.Builder; 6841 llvm::Value *ThreadLimitVal = nullptr; 6842 llvm::Value *NumThreadsVal = nullptr; 6843 switch (DirectiveKind) { 6844 case OMPD_target: { 6845 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6846 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6847 return NumThreads; 6848 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6849 CGF.getContext(), CS->getCapturedStmt()); 6850 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6851 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6852 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6853 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6854 const auto *ThreadLimitClause = 6855 Dir->getSingleClause<OMPThreadLimitClause>(); 6856 CodeGenFunction::LexicalScope Scope( 6857 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6858 if (const auto *PreInit = 6859 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6860 for (const auto *I : PreInit->decls()) { 6861 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6862 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6863 } else { 6864 CodeGenFunction::AutoVarEmission Emission = 6865 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6866 CGF.EmitAutoVarCleanups(Emission); 6867 } 6868 } 6869 } 6870 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6871 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6872 ThreadLimitVal = 6873 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6874 } 6875 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6876 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6877 CS = Dir->getInnermostCapturedStmt(); 6878 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6879 CGF.getContext(), CS->getCapturedStmt()); 6880 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6881 } 6882 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6883 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6884 CS = Dir->getInnermostCapturedStmt(); 6885 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6886 return NumThreads; 6887 } 6888 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6889 return Bld.getInt32(1); 6890 } 6891 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6892 } 6893 case OMPD_target_teams: { 6894 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6895 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6896 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6897 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6898 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6899 ThreadLimitVal = 6900 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6901 } 6902 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6903 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6904 return NumThreads; 6905 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6906 CGF.getContext(), CS->getCapturedStmt()); 6907 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6908 if (Dir->getDirectiveKind() == OMPD_distribute) { 6909 CS = Dir->getInnermostCapturedStmt(); 6910 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6911 return NumThreads; 6912 } 6913 } 6914 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6915 } 6916 case OMPD_target_teams_distribute: 6917 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6918 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6919 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6920 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6921 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6922 ThreadLimitVal = 6923 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6924 } 6925 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6926 case OMPD_target_parallel: 6927 case OMPD_target_parallel_for: 6928 case OMPD_target_parallel_for_simd: 6929 case OMPD_target_teams_distribute_parallel_for: 6930 case OMPD_target_teams_distribute_parallel_for_simd: { 6931 llvm::Value *CondVal = nullptr; 6932 // Handle if clause. If if clause present, the number of threads is 6933 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6934 if (D.hasClausesOfKind<OMPIfClause>()) { 6935 const OMPIfClause *IfClause = nullptr; 6936 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6937 if (C->getNameModifier() == OMPD_unknown || 6938 C->getNameModifier() == OMPD_parallel) { 6939 IfClause = C; 6940 break; 6941 } 6942 } 6943 if (IfClause) { 6944 const Expr *Cond = IfClause->getCondition(); 6945 bool Result; 6946 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6947 if (!Result) 6948 return Bld.getInt32(1); 6949 } else { 6950 CodeGenFunction::RunCleanupsScope Scope(CGF); 6951 CondVal = CGF.EvaluateExprAsBool(Cond); 6952 } 6953 } 6954 } 6955 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6956 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6957 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6958 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6959 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6960 ThreadLimitVal = 6961 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6962 } 6963 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6964 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6965 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6966 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6967 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6968 NumThreadsVal = 6969 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6970 ThreadLimitVal = ThreadLimitVal 6971 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6972 ThreadLimitVal), 6973 NumThreadsVal, ThreadLimitVal) 6974 : NumThreadsVal; 6975 } 6976 if (!ThreadLimitVal) 6977 ThreadLimitVal = Bld.getInt32(0); 6978 if (CondVal) 6979 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6980 return ThreadLimitVal; 6981 } 6982 case OMPD_target_teams_distribute_simd: 6983 case OMPD_target_simd: 6984 return Bld.getInt32(1); 6985 case OMPD_parallel: 6986 case OMPD_for: 6987 case OMPD_parallel_for: 6988 case OMPD_parallel_master: 6989 case OMPD_parallel_sections: 6990 case OMPD_for_simd: 6991 case OMPD_parallel_for_simd: 6992 case OMPD_cancel: 6993 case OMPD_cancellation_point: 6994 case OMPD_ordered: 6995 case OMPD_threadprivate: 6996 case OMPD_allocate: 6997 case OMPD_task: 6998 case OMPD_simd: 6999 case OMPD_tile: 7000 case OMPD_sections: 7001 case OMPD_section: 7002 case OMPD_single: 7003 case OMPD_master: 7004 case OMPD_critical: 7005 case OMPD_taskyield: 7006 case OMPD_barrier: 7007 case OMPD_taskwait: 7008 case OMPD_taskgroup: 7009 case OMPD_atomic: 7010 case OMPD_flush: 7011 case OMPD_depobj: 7012 case OMPD_scan: 7013 case OMPD_teams: 7014 case OMPD_target_data: 7015 case OMPD_target_exit_data: 7016 case OMPD_target_enter_data: 7017 case OMPD_distribute: 7018 case OMPD_distribute_simd: 7019 case OMPD_distribute_parallel_for: 7020 case OMPD_distribute_parallel_for_simd: 7021 case OMPD_teams_distribute: 7022 case OMPD_teams_distribute_simd: 7023 case OMPD_teams_distribute_parallel_for: 7024 case OMPD_teams_distribute_parallel_for_simd: 7025 case OMPD_target_update: 7026 case OMPD_declare_simd: 7027 case OMPD_declare_variant: 7028 case OMPD_begin_declare_variant: 7029 case OMPD_end_declare_variant: 7030 case OMPD_declare_target: 7031 case OMPD_end_declare_target: 7032 case OMPD_declare_reduction: 7033 case OMPD_declare_mapper: 7034 case OMPD_taskloop: 7035 case OMPD_taskloop_simd: 7036 case OMPD_master_taskloop: 7037 case OMPD_master_taskloop_simd: 7038 case OMPD_parallel_master_taskloop: 7039 case OMPD_parallel_master_taskloop_simd: 7040 case OMPD_requires: 7041 case OMPD_unknown: 7042 break; 7043 default: 7044 break; 7045 } 7046 llvm_unreachable("Unsupported directive kind."); 7047 } 7048 7049 namespace { 7050 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7051 7052 // Utility to handle information from clauses associated with a given 7053 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7054 // It provides a convenient interface to obtain the information and generate 7055 // code for that information. 7056 class MappableExprsHandler { 7057 public: 7058 /// Values for bit flags used to specify the mapping type for 7059 /// offloading. 7060 enum OpenMPOffloadMappingFlags : uint64_t { 7061 /// No flags 7062 OMP_MAP_NONE = 0x0, 7063 /// Allocate memory on the device and move data from host to device. 7064 OMP_MAP_TO = 0x01, 7065 /// Allocate memory on the device and move data from device to host. 7066 OMP_MAP_FROM = 0x02, 7067 /// Always perform the requested mapping action on the element, even 7068 /// if it was already mapped before. 7069 OMP_MAP_ALWAYS = 0x04, 7070 /// Delete the element from the device environment, ignoring the 7071 /// current reference count associated with the element. 7072 OMP_MAP_DELETE = 0x08, 7073 /// The element being mapped is a pointer-pointee pair; both the 7074 /// pointer and the pointee should be mapped. 7075 OMP_MAP_PTR_AND_OBJ = 0x10, 7076 /// This flags signals that the base address of an entry should be 7077 /// passed to the target kernel as an argument. 7078 OMP_MAP_TARGET_PARAM = 0x20, 7079 /// Signal that the runtime library has to return the device pointer 7080 /// in the current position for the data being mapped. Used when we have the 7081 /// use_device_ptr or use_device_addr clause. 7082 OMP_MAP_RETURN_PARAM = 0x40, 7083 /// This flag signals that the reference being passed is a pointer to 7084 /// private data. 7085 OMP_MAP_PRIVATE = 0x80, 7086 /// Pass the element to the device by value. 7087 OMP_MAP_LITERAL = 0x100, 7088 /// Implicit map 7089 OMP_MAP_IMPLICIT = 0x200, 7090 /// Close is a hint to the runtime to allocate memory close to 7091 /// the target device. 7092 OMP_MAP_CLOSE = 0x400, 7093 /// 0x800 is reserved for compatibility with XLC. 7094 /// Produce a runtime error if the data is not already allocated. 7095 OMP_MAP_PRESENT = 0x1000, 7096 /// Signal that the runtime library should use args as an array of 7097 /// descriptor_dim pointers and use args_size as dims. Used when we have 7098 /// non-contiguous list items in target update directive 7099 OMP_MAP_NON_CONTIG = 0x100000000000, 7100 /// The 16 MSBs of the flags indicate whether the entry is member of some 7101 /// struct/class. 7102 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7103 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7104 }; 7105 7106 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7107 static unsigned getFlagMemberOffset() { 7108 unsigned Offset = 0; 7109 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7110 Remain = Remain >> 1) 7111 Offset++; 7112 return Offset; 7113 } 7114 7115 /// Class that holds debugging information for a data mapping to be passed to 7116 /// the runtime library. 7117 class MappingExprInfo { 7118 /// The variable declaration used for the data mapping. 7119 const ValueDecl *MapDecl = nullptr; 7120 /// The original expression used in the map clause, or null if there is 7121 /// none. 7122 const Expr *MapExpr = nullptr; 7123 7124 public: 7125 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7126 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7127 7128 const ValueDecl *getMapDecl() const { return MapDecl; } 7129 const Expr *getMapExpr() const { return MapExpr; } 7130 }; 7131 7132 /// Class that associates information with a base pointer to be passed to the 7133 /// runtime library. 7134 class BasePointerInfo { 7135 /// The base pointer. 7136 llvm::Value *Ptr = nullptr; 7137 /// The base declaration that refers to this device pointer, or null if 7138 /// there is none. 7139 const ValueDecl *DevPtrDecl = nullptr; 7140 7141 public: 7142 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7143 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7144 llvm::Value *operator*() const { return Ptr; } 7145 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7146 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7147 }; 7148 7149 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7150 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7151 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7152 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7153 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7154 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7155 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7156 7157 /// This structure contains combined information generated for mappable 7158 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7159 /// mappers, and non-contiguous information. 7160 struct MapCombinedInfoTy { 7161 struct StructNonContiguousInfo { 7162 bool IsNonContiguous = false; 7163 MapDimArrayTy Dims; 7164 MapNonContiguousArrayTy Offsets; 7165 MapNonContiguousArrayTy Counts; 7166 MapNonContiguousArrayTy Strides; 7167 }; 7168 MapExprsArrayTy Exprs; 7169 MapBaseValuesArrayTy BasePointers; 7170 MapValuesArrayTy Pointers; 7171 MapValuesArrayTy Sizes; 7172 MapFlagsArrayTy Types; 7173 MapMappersArrayTy Mappers; 7174 StructNonContiguousInfo NonContigInfo; 7175 7176 /// Append arrays in \a CurInfo. 7177 void append(MapCombinedInfoTy &CurInfo) { 7178 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7179 BasePointers.append(CurInfo.BasePointers.begin(), 7180 CurInfo.BasePointers.end()); 7181 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7182 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7183 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7184 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7185 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7186 CurInfo.NonContigInfo.Dims.end()); 7187 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7188 CurInfo.NonContigInfo.Offsets.end()); 7189 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7190 CurInfo.NonContigInfo.Counts.end()); 7191 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7192 CurInfo.NonContigInfo.Strides.end()); 7193 } 7194 }; 7195 7196 /// Map between a struct and the its lowest & highest elements which have been 7197 /// mapped. 7198 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7199 /// HE(FieldIndex, Pointer)} 7200 struct StructRangeInfoTy { 7201 MapCombinedInfoTy PreliminaryMapData; 7202 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7203 0, Address::invalid()}; 7204 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7205 0, Address::invalid()}; 7206 Address Base = Address::invalid(); 7207 Address LB = Address::invalid(); 7208 bool IsArraySection = false; 7209 bool HasCompleteRecord = false; 7210 }; 7211 7212 private: 7213 /// Kind that defines how a device pointer has to be returned. 7214 struct MapInfo { 7215 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7216 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7217 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7218 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7219 bool ReturnDevicePointer = false; 7220 bool IsImplicit = false; 7221 const ValueDecl *Mapper = nullptr; 7222 const Expr *VarRef = nullptr; 7223 bool ForDeviceAddr = false; 7224 7225 MapInfo() = default; 7226 MapInfo( 7227 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7228 OpenMPMapClauseKind MapType, 7229 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7230 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7231 bool ReturnDevicePointer, bool IsImplicit, 7232 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7233 bool ForDeviceAddr = false) 7234 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7235 MotionModifiers(MotionModifiers), 7236 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7237 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7238 }; 7239 7240 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7241 /// member and there is no map information about it, then emission of that 7242 /// entry is deferred until the whole struct has been processed. 7243 struct DeferredDevicePtrEntryTy { 7244 const Expr *IE = nullptr; 7245 const ValueDecl *VD = nullptr; 7246 bool ForDeviceAddr = false; 7247 7248 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7249 bool ForDeviceAddr) 7250 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7251 }; 7252 7253 /// The target directive from where the mappable clauses were extracted. It 7254 /// is either a executable directive or a user-defined mapper directive. 7255 llvm::PointerUnion<const OMPExecutableDirective *, 7256 const OMPDeclareMapperDecl *> 7257 CurDir; 7258 7259 /// Function the directive is being generated for. 7260 CodeGenFunction &CGF; 7261 7262 /// Set of all first private variables in the current directive. 7263 /// bool data is set to true if the variable is implicitly marked as 7264 /// firstprivate, false otherwise. 7265 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7266 7267 /// Map between device pointer declarations and their expression components. 7268 /// The key value for declarations in 'this' is null. 7269 llvm::DenseMap< 7270 const ValueDecl *, 7271 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7272 DevPointersMap; 7273 7274 llvm::Value *getExprTypeSize(const Expr *E) const { 7275 QualType ExprTy = E->getType().getCanonicalType(); 7276 7277 // Calculate the size for array shaping expression. 7278 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7279 llvm::Value *Size = 7280 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7281 for (const Expr *SE : OAE->getDimensions()) { 7282 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7283 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7284 CGF.getContext().getSizeType(), 7285 SE->getExprLoc()); 7286 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7287 } 7288 return Size; 7289 } 7290 7291 // Reference types are ignored for mapping purposes. 7292 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7293 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7294 7295 // Given that an array section is considered a built-in type, we need to 7296 // do the calculation based on the length of the section instead of relying 7297 // on CGF.getTypeSize(E->getType()). 7298 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7299 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7300 OAE->getBase()->IgnoreParenImpCasts()) 7301 .getCanonicalType(); 7302 7303 // If there is no length associated with the expression and lower bound is 7304 // not specified too, that means we are using the whole length of the 7305 // base. 7306 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7307 !OAE->getLowerBound()) 7308 return CGF.getTypeSize(BaseTy); 7309 7310 llvm::Value *ElemSize; 7311 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7312 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7313 } else { 7314 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7315 assert(ATy && "Expecting array type if not a pointer type."); 7316 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7317 } 7318 7319 // If we don't have a length at this point, that is because we have an 7320 // array section with a single element. 7321 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7322 return ElemSize; 7323 7324 if (const Expr *LenExpr = OAE->getLength()) { 7325 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7326 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7327 CGF.getContext().getSizeType(), 7328 LenExpr->getExprLoc()); 7329 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7330 } 7331 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7332 OAE->getLowerBound() && "expected array_section[lb:]."); 7333 // Size = sizetype - lb * elemtype; 7334 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7335 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7336 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7337 CGF.getContext().getSizeType(), 7338 OAE->getLowerBound()->getExprLoc()); 7339 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7340 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7341 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7342 LengthVal = CGF.Builder.CreateSelect( 7343 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7344 return LengthVal; 7345 } 7346 return CGF.getTypeSize(ExprTy); 7347 } 7348 7349 /// Return the corresponding bits for a given map clause modifier. Add 7350 /// a flag marking the map as a pointer if requested. Add a flag marking the 7351 /// map as the first one of a series of maps that relate to the same map 7352 /// expression. 7353 OpenMPOffloadMappingFlags getMapTypeBits( 7354 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7355 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7356 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7357 OpenMPOffloadMappingFlags Bits = 7358 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7359 switch (MapType) { 7360 case OMPC_MAP_alloc: 7361 case OMPC_MAP_release: 7362 // alloc and release is the default behavior in the runtime library, i.e. 7363 // if we don't pass any bits alloc/release that is what the runtime is 7364 // going to do. Therefore, we don't need to signal anything for these two 7365 // type modifiers. 7366 break; 7367 case OMPC_MAP_to: 7368 Bits |= OMP_MAP_TO; 7369 break; 7370 case OMPC_MAP_from: 7371 Bits |= OMP_MAP_FROM; 7372 break; 7373 case OMPC_MAP_tofrom: 7374 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7375 break; 7376 case OMPC_MAP_delete: 7377 Bits |= OMP_MAP_DELETE; 7378 break; 7379 case OMPC_MAP_unknown: 7380 llvm_unreachable("Unexpected map type!"); 7381 } 7382 if (AddPtrFlag) 7383 Bits |= OMP_MAP_PTR_AND_OBJ; 7384 if (AddIsTargetParamFlag) 7385 Bits |= OMP_MAP_TARGET_PARAM; 7386 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7387 != MapModifiers.end()) 7388 Bits |= OMP_MAP_ALWAYS; 7389 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7390 != MapModifiers.end()) 7391 Bits |= OMP_MAP_CLOSE; 7392 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7393 MapModifiers.end() || 7394 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7395 MotionModifiers.end()) 7396 Bits |= OMP_MAP_PRESENT; 7397 if (IsNonContiguous) 7398 Bits |= OMP_MAP_NON_CONTIG; 7399 return Bits; 7400 } 7401 7402 /// Return true if the provided expression is a final array section. A 7403 /// final array section, is one whose length can't be proved to be one. 7404 bool isFinalArraySectionExpression(const Expr *E) const { 7405 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7406 7407 // It is not an array section and therefore not a unity-size one. 7408 if (!OASE) 7409 return false; 7410 7411 // An array section with no colon always refer to a single element. 7412 if (OASE->getColonLocFirst().isInvalid()) 7413 return false; 7414 7415 const Expr *Length = OASE->getLength(); 7416 7417 // If we don't have a length we have to check if the array has size 1 7418 // for this dimension. Also, we should always expect a length if the 7419 // base type is pointer. 7420 if (!Length) { 7421 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7422 OASE->getBase()->IgnoreParenImpCasts()) 7423 .getCanonicalType(); 7424 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7425 return ATy->getSize().getSExtValue() != 1; 7426 // If we don't have a constant dimension length, we have to consider 7427 // the current section as having any size, so it is not necessarily 7428 // unitary. If it happen to be unity size, that's user fault. 7429 return true; 7430 } 7431 7432 // Check if the length evaluates to 1. 7433 Expr::EvalResult Result; 7434 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7435 return true; // Can have more that size 1. 7436 7437 llvm::APSInt ConstLength = Result.Val.getInt(); 7438 return ConstLength.getSExtValue() != 1; 7439 } 7440 7441 /// Generate the base pointers, section pointers, sizes, map type bits, and 7442 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7443 /// map type, map or motion modifiers, and expression components. 7444 /// \a IsFirstComponent should be set to true if the provided set of 7445 /// components is the first associated with a capture. 7446 void generateInfoForComponentList( 7447 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7448 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7449 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7450 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7451 bool IsFirstComponentList, bool IsImplicit, 7452 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7453 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7454 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7455 OverlappedElements = llvm::None) const { 7456 // The following summarizes what has to be generated for each map and the 7457 // types below. The generated information is expressed in this order: 7458 // base pointer, section pointer, size, flags 7459 // (to add to the ones that come from the map type and modifier). 7460 // 7461 // double d; 7462 // int i[100]; 7463 // float *p; 7464 // 7465 // struct S1 { 7466 // int i; 7467 // float f[50]; 7468 // } 7469 // struct S2 { 7470 // int i; 7471 // float f[50]; 7472 // S1 s; 7473 // double *p; 7474 // struct S2 *ps; 7475 // int &ref; 7476 // } 7477 // S2 s; 7478 // S2 *ps; 7479 // 7480 // map(d) 7481 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7482 // 7483 // map(i) 7484 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7485 // 7486 // map(i[1:23]) 7487 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7488 // 7489 // map(p) 7490 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7491 // 7492 // map(p[1:24]) 7493 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7494 // in unified shared memory mode or for local pointers 7495 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7496 // 7497 // map(s) 7498 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7499 // 7500 // map(s.i) 7501 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7502 // 7503 // map(s.s.f) 7504 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7505 // 7506 // map(s.p) 7507 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7508 // 7509 // map(to: s.p[:22]) 7510 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7511 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7512 // &(s.p), &(s.p[0]), 22*sizeof(double), 7513 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7514 // (*) alloc space for struct members, only this is a target parameter 7515 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7516 // optimizes this entry out, same in the examples below) 7517 // (***) map the pointee (map: to) 7518 // 7519 // map(to: s.ref) 7520 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7521 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7522 // (*) alloc space for struct members, only this is a target parameter 7523 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7524 // optimizes this entry out, same in the examples below) 7525 // (***) map the pointee (map: to) 7526 // 7527 // map(s.ps) 7528 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7529 // 7530 // map(from: s.ps->s.i) 7531 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7532 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7533 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7534 // 7535 // map(to: s.ps->ps) 7536 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7537 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7538 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7539 // 7540 // map(s.ps->ps->ps) 7541 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7542 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7543 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7544 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7545 // 7546 // map(to: s.ps->ps->s.f[:22]) 7547 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7548 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7549 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7550 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7551 // 7552 // map(ps) 7553 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7554 // 7555 // map(ps->i) 7556 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7557 // 7558 // map(ps->s.f) 7559 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7560 // 7561 // map(from: ps->p) 7562 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7563 // 7564 // map(to: ps->p[:22]) 7565 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7566 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7567 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7568 // 7569 // map(ps->ps) 7570 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7571 // 7572 // map(from: ps->ps->s.i) 7573 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7574 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7575 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7576 // 7577 // map(from: ps->ps->ps) 7578 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7579 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7580 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7581 // 7582 // map(ps->ps->ps->ps) 7583 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7584 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7585 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7586 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7587 // 7588 // map(to: ps->ps->ps->s.f[:22]) 7589 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7590 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7591 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7592 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7593 // 7594 // map(to: s.f[:22]) map(from: s.p[:33]) 7595 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7596 // sizeof(double*) (**), TARGET_PARAM 7597 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7598 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7599 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7600 // (*) allocate contiguous space needed to fit all mapped members even if 7601 // we allocate space for members not mapped (in this example, 7602 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7603 // them as well because they fall between &s.f[0] and &s.p) 7604 // 7605 // map(from: s.f[:22]) map(to: ps->p[:33]) 7606 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7607 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7608 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7609 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7610 // (*) the struct this entry pertains to is the 2nd element in the list of 7611 // arguments, hence MEMBER_OF(2) 7612 // 7613 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7614 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7615 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7616 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7617 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7618 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7619 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7620 // (*) the struct this entry pertains to is the 4th element in the list 7621 // of arguments, hence MEMBER_OF(4) 7622 7623 // Track if the map information being generated is the first for a capture. 7624 bool IsCaptureFirstInfo = IsFirstComponentList; 7625 // When the variable is on a declare target link or in a to clause with 7626 // unified memory, a reference is needed to hold the host/device address 7627 // of the variable. 7628 bool RequiresReference = false; 7629 7630 // Scan the components from the base to the complete expression. 7631 auto CI = Components.rbegin(); 7632 auto CE = Components.rend(); 7633 auto I = CI; 7634 7635 // Track if the map information being generated is the first for a list of 7636 // components. 7637 bool IsExpressionFirstInfo = true; 7638 bool FirstPointerInComplexData = false; 7639 Address BP = Address::invalid(); 7640 const Expr *AssocExpr = I->getAssociatedExpression(); 7641 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7642 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7643 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7644 7645 if (isa<MemberExpr>(AssocExpr)) { 7646 // The base is the 'this' pointer. The content of the pointer is going 7647 // to be the base of the field being mapped. 7648 BP = CGF.LoadCXXThisAddress(); 7649 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7650 (OASE && 7651 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7652 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7653 } else if (OAShE && 7654 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7655 BP = Address( 7656 CGF.EmitScalarExpr(OAShE->getBase()), 7657 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7658 } else { 7659 // The base is the reference to the variable. 7660 // BP = &Var. 7661 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7662 if (const auto *VD = 7663 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7664 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7665 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7666 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7667 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7668 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7669 RequiresReference = true; 7670 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7671 } 7672 } 7673 } 7674 7675 // If the variable is a pointer and is being dereferenced (i.e. is not 7676 // the last component), the base has to be the pointer itself, not its 7677 // reference. References are ignored for mapping purposes. 7678 QualType Ty = 7679 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7680 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7681 // No need to generate individual map information for the pointer, it 7682 // can be associated with the combined storage if shared memory mode is 7683 // active or the base declaration is not global variable. 7684 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7685 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7686 !VD || VD->hasLocalStorage()) 7687 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7688 else 7689 FirstPointerInComplexData = true; 7690 ++I; 7691 } 7692 } 7693 7694 // Track whether a component of the list should be marked as MEMBER_OF some 7695 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7696 // in a component list should be marked as MEMBER_OF, all subsequent entries 7697 // do not belong to the base struct. E.g. 7698 // struct S2 s; 7699 // s.ps->ps->ps->f[:] 7700 // (1) (2) (3) (4) 7701 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7702 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7703 // is the pointee of ps(2) which is not member of struct s, so it should not 7704 // be marked as such (it is still PTR_AND_OBJ). 7705 // The variable is initialized to false so that PTR_AND_OBJ entries which 7706 // are not struct members are not considered (e.g. array of pointers to 7707 // data). 7708 bool ShouldBeMemberOf = false; 7709 7710 // Variable keeping track of whether or not we have encountered a component 7711 // in the component list which is a member expression. Useful when we have a 7712 // pointer or a final array section, in which case it is the previous 7713 // component in the list which tells us whether we have a member expression. 7714 // E.g. X.f[:] 7715 // While processing the final array section "[:]" it is "f" which tells us 7716 // whether we are dealing with a member of a declared struct. 7717 const MemberExpr *EncounteredME = nullptr; 7718 7719 // Track for the total number of dimension. Start from one for the dummy 7720 // dimension. 7721 uint64_t DimSize = 1; 7722 7723 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7724 bool IsPrevMemberReference = false; 7725 7726 for (; I != CE; ++I) { 7727 // If the current component is member of a struct (parent struct) mark it. 7728 if (!EncounteredME) { 7729 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7730 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7731 // as MEMBER_OF the parent struct. 7732 if (EncounteredME) { 7733 ShouldBeMemberOf = true; 7734 // Do not emit as complex pointer if this is actually not array-like 7735 // expression. 7736 if (FirstPointerInComplexData) { 7737 QualType Ty = std::prev(I) 7738 ->getAssociatedDeclaration() 7739 ->getType() 7740 .getNonReferenceType(); 7741 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7742 FirstPointerInComplexData = false; 7743 } 7744 } 7745 } 7746 7747 auto Next = std::next(I); 7748 7749 // We need to generate the addresses and sizes if this is the last 7750 // component, if the component is a pointer or if it is an array section 7751 // whose length can't be proved to be one. If this is a pointer, it 7752 // becomes the base address for the following components. 7753 7754 // A final array section, is one whose length can't be proved to be one. 7755 // If the map item is non-contiguous then we don't treat any array section 7756 // as final array section. 7757 bool IsFinalArraySection = 7758 !IsNonContiguous && 7759 isFinalArraySectionExpression(I->getAssociatedExpression()); 7760 7761 // If we have a declaration for the mapping use that, otherwise use 7762 // the base declaration of the map clause. 7763 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7764 ? I->getAssociatedDeclaration() 7765 : BaseDecl; 7766 7767 // Get information on whether the element is a pointer. Have to do a 7768 // special treatment for array sections given that they are built-in 7769 // types. 7770 const auto *OASE = 7771 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7772 const auto *OAShE = 7773 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7774 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7775 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7776 bool IsPointer = 7777 OAShE || 7778 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7779 .getCanonicalType() 7780 ->isAnyPointerType()) || 7781 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7782 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7783 MapDecl && 7784 MapDecl->getType()->isLValueReferenceType(); 7785 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7786 7787 if (OASE) 7788 ++DimSize; 7789 7790 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7791 IsFinalArraySection) { 7792 // If this is not the last component, we expect the pointer to be 7793 // associated with an array expression or member expression. 7794 assert((Next == CE || 7795 isa<MemberExpr>(Next->getAssociatedExpression()) || 7796 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7797 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7798 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7799 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7800 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7801 "Unexpected expression"); 7802 7803 Address LB = Address::invalid(); 7804 Address LowestElem = Address::invalid(); 7805 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7806 const MemberExpr *E) { 7807 const Expr *BaseExpr = E->getBase(); 7808 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7809 // scalar. 7810 LValue BaseLV; 7811 if (E->isArrow()) { 7812 LValueBaseInfo BaseInfo; 7813 TBAAAccessInfo TBAAInfo; 7814 Address Addr = 7815 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7816 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7817 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7818 } else { 7819 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7820 } 7821 return BaseLV; 7822 }; 7823 if (OAShE) { 7824 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7825 CGF.getContext().getTypeAlignInChars( 7826 OAShE->getBase()->getType())); 7827 } else if (IsMemberReference) { 7828 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7829 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7830 LowestElem = CGF.EmitLValueForFieldInitialization( 7831 BaseLVal, cast<FieldDecl>(MapDecl)) 7832 .getAddress(CGF); 7833 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7834 .getAddress(CGF); 7835 } else { 7836 LowestElem = LB = 7837 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7838 .getAddress(CGF); 7839 } 7840 7841 // If this component is a pointer inside the base struct then we don't 7842 // need to create any entry for it - it will be combined with the object 7843 // it is pointing to into a single PTR_AND_OBJ entry. 7844 bool IsMemberPointerOrAddr = 7845 EncounteredME && 7846 (((IsPointer || ForDeviceAddr) && 7847 I->getAssociatedExpression() == EncounteredME) || 7848 (IsPrevMemberReference && !IsPointer) || 7849 (IsMemberReference && Next != CE && 7850 !Next->getAssociatedExpression()->getType()->isPointerType())); 7851 if (!OverlappedElements.empty() && Next == CE) { 7852 // Handle base element with the info for overlapped elements. 7853 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7854 assert(!IsPointer && 7855 "Unexpected base element with the pointer type."); 7856 // Mark the whole struct as the struct that requires allocation on the 7857 // device. 7858 PartialStruct.LowestElem = {0, LowestElem}; 7859 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7860 I->getAssociatedExpression()->getType()); 7861 Address HB = CGF.Builder.CreateConstGEP( 7862 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7863 CGF.VoidPtrTy), 7864 TypeSize.getQuantity() - 1); 7865 PartialStruct.HighestElem = { 7866 std::numeric_limits<decltype( 7867 PartialStruct.HighestElem.first)>::max(), 7868 HB}; 7869 PartialStruct.Base = BP; 7870 PartialStruct.LB = LB; 7871 assert( 7872 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7873 "Overlapped elements must be used only once for the variable."); 7874 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7875 // Emit data for non-overlapped data. 7876 OpenMPOffloadMappingFlags Flags = 7877 OMP_MAP_MEMBER_OF | 7878 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7879 /*AddPtrFlag=*/false, 7880 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7881 llvm::Value *Size = nullptr; 7882 // Do bitcopy of all non-overlapped structure elements. 7883 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7884 Component : OverlappedElements) { 7885 Address ComponentLB = Address::invalid(); 7886 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7887 Component) { 7888 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7889 const auto *FD = dyn_cast<FieldDecl>(VD); 7890 if (FD && FD->getType()->isLValueReferenceType()) { 7891 const auto *ME = 7892 cast<MemberExpr>(MC.getAssociatedExpression()); 7893 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7894 ComponentLB = 7895 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7896 .getAddress(CGF); 7897 } else { 7898 ComponentLB = 7899 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7900 .getAddress(CGF); 7901 } 7902 Size = CGF.Builder.CreatePtrDiff( 7903 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7904 CGF.EmitCastToVoidPtr(LB.getPointer())); 7905 break; 7906 } 7907 } 7908 assert(Size && "Failed to determine structure size"); 7909 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7910 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7911 CombinedInfo.Pointers.push_back(LB.getPointer()); 7912 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7913 Size, CGF.Int64Ty, /*isSigned=*/true)); 7914 CombinedInfo.Types.push_back(Flags); 7915 CombinedInfo.Mappers.push_back(nullptr); 7916 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7917 : 1); 7918 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7919 } 7920 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7921 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7922 CombinedInfo.Pointers.push_back(LB.getPointer()); 7923 Size = CGF.Builder.CreatePtrDiff( 7924 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7925 CGF.EmitCastToVoidPtr(LB.getPointer())); 7926 CombinedInfo.Sizes.push_back( 7927 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7928 CombinedInfo.Types.push_back(Flags); 7929 CombinedInfo.Mappers.push_back(nullptr); 7930 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7931 : 1); 7932 break; 7933 } 7934 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7935 if (!IsMemberPointerOrAddr || 7936 (Next == CE && MapType != OMPC_MAP_unknown)) { 7937 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7938 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7939 CombinedInfo.Pointers.push_back(LB.getPointer()); 7940 CombinedInfo.Sizes.push_back( 7941 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7942 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7943 : 1); 7944 7945 // If Mapper is valid, the last component inherits the mapper. 7946 bool HasMapper = Mapper && Next == CE; 7947 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7948 7949 // We need to add a pointer flag for each map that comes from the 7950 // same expression except for the first one. We also need to signal 7951 // this map is the first one that relates with the current capture 7952 // (there is a set of entries for each capture). 7953 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7954 MapType, MapModifiers, MotionModifiers, IsImplicit, 7955 !IsExpressionFirstInfo || RequiresReference || 7956 FirstPointerInComplexData || IsMemberReference, 7957 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7958 7959 if (!IsExpressionFirstInfo || IsMemberReference) { 7960 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7961 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7962 if (IsPointer || (IsMemberReference && Next != CE)) 7963 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7964 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7965 7966 if (ShouldBeMemberOf) { 7967 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7968 // should be later updated with the correct value of MEMBER_OF. 7969 Flags |= OMP_MAP_MEMBER_OF; 7970 // From now on, all subsequent PTR_AND_OBJ entries should not be 7971 // marked as MEMBER_OF. 7972 ShouldBeMemberOf = false; 7973 } 7974 } 7975 7976 CombinedInfo.Types.push_back(Flags); 7977 } 7978 7979 // If we have encountered a member expression so far, keep track of the 7980 // mapped member. If the parent is "*this", then the value declaration 7981 // is nullptr. 7982 if (EncounteredME) { 7983 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7984 unsigned FieldIndex = FD->getFieldIndex(); 7985 7986 // Update info about the lowest and highest elements for this struct 7987 if (!PartialStruct.Base.isValid()) { 7988 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7989 if (IsFinalArraySection) { 7990 Address HB = 7991 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7992 .getAddress(CGF); 7993 PartialStruct.HighestElem = {FieldIndex, HB}; 7994 } else { 7995 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7996 } 7997 PartialStruct.Base = BP; 7998 PartialStruct.LB = BP; 7999 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8000 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8001 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8002 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8003 } 8004 } 8005 8006 // Need to emit combined struct for array sections. 8007 if (IsFinalArraySection || IsNonContiguous) 8008 PartialStruct.IsArraySection = true; 8009 8010 // If we have a final array section, we are done with this expression. 8011 if (IsFinalArraySection) 8012 break; 8013 8014 // The pointer becomes the base for the next element. 8015 if (Next != CE) 8016 BP = IsMemberReference ? LowestElem : LB; 8017 8018 IsExpressionFirstInfo = false; 8019 IsCaptureFirstInfo = false; 8020 FirstPointerInComplexData = false; 8021 IsPrevMemberReference = IsMemberReference; 8022 } else if (FirstPointerInComplexData) { 8023 QualType Ty = Components.rbegin() 8024 ->getAssociatedDeclaration() 8025 ->getType() 8026 .getNonReferenceType(); 8027 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8028 FirstPointerInComplexData = false; 8029 } 8030 } 8031 // If ran into the whole component - allocate the space for the whole 8032 // record. 8033 if (!EncounteredME) 8034 PartialStruct.HasCompleteRecord = true; 8035 8036 if (!IsNonContiguous) 8037 return; 8038 8039 const ASTContext &Context = CGF.getContext(); 8040 8041 // For supporting stride in array section, we need to initialize the first 8042 // dimension size as 1, first offset as 0, and first count as 1 8043 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8044 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8045 MapValuesArrayTy CurStrides; 8046 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8047 uint64_t ElementTypeSize; 8048 8049 // Collect Size information for each dimension and get the element size as 8050 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8051 // should be [10, 10] and the first stride is 4 btyes. 8052 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8053 Components) { 8054 const Expr *AssocExpr = Component.getAssociatedExpression(); 8055 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8056 8057 if (!OASE) 8058 continue; 8059 8060 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8061 auto *CAT = Context.getAsConstantArrayType(Ty); 8062 auto *VAT = Context.getAsVariableArrayType(Ty); 8063 8064 // We need all the dimension size except for the last dimension. 8065 assert((VAT || CAT || &Component == &*Components.begin()) && 8066 "Should be either ConstantArray or VariableArray if not the " 8067 "first Component"); 8068 8069 // Get element size if CurStrides is empty. 8070 if (CurStrides.empty()) { 8071 const Type *ElementType = nullptr; 8072 if (CAT) 8073 ElementType = CAT->getElementType().getTypePtr(); 8074 else if (VAT) 8075 ElementType = VAT->getElementType().getTypePtr(); 8076 else 8077 assert(&Component == &*Components.begin() && 8078 "Only expect pointer (non CAT or VAT) when this is the " 8079 "first Component"); 8080 // If ElementType is null, then it means the base is a pointer 8081 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8082 // for next iteration. 8083 if (ElementType) { 8084 // For the case that having pointer as base, we need to remove one 8085 // level of indirection. 8086 if (&Component != &*Components.begin()) 8087 ElementType = ElementType->getPointeeOrArrayElementType(); 8088 ElementTypeSize = 8089 Context.getTypeSizeInChars(ElementType).getQuantity(); 8090 CurStrides.push_back( 8091 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8092 } 8093 } 8094 // Get dimension value except for the last dimension since we don't need 8095 // it. 8096 if (DimSizes.size() < Components.size() - 1) { 8097 if (CAT) 8098 DimSizes.push_back(llvm::ConstantInt::get( 8099 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8100 else if (VAT) 8101 DimSizes.push_back(CGF.Builder.CreateIntCast( 8102 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8103 /*IsSigned=*/false)); 8104 } 8105 } 8106 8107 // Skip the dummy dimension since we have already have its information. 8108 auto DI = DimSizes.begin() + 1; 8109 // Product of dimension. 8110 llvm::Value *DimProd = 8111 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8112 8113 // Collect info for non-contiguous. Notice that offset, count, and stride 8114 // are only meaningful for array-section, so we insert a null for anything 8115 // other than array-section. 8116 // Also, the size of offset, count, and stride are not the same as 8117 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8118 // count, and stride are the same as the number of non-contiguous 8119 // declaration in target update to/from clause. 8120 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8121 Components) { 8122 const Expr *AssocExpr = Component.getAssociatedExpression(); 8123 8124 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8125 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8126 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8127 /*isSigned=*/false); 8128 CurOffsets.push_back(Offset); 8129 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8130 CurStrides.push_back(CurStrides.back()); 8131 continue; 8132 } 8133 8134 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8135 8136 if (!OASE) 8137 continue; 8138 8139 // Offset 8140 const Expr *OffsetExpr = OASE->getLowerBound(); 8141 llvm::Value *Offset = nullptr; 8142 if (!OffsetExpr) { 8143 // If offset is absent, then we just set it to zero. 8144 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8145 } else { 8146 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8147 CGF.Int64Ty, 8148 /*isSigned=*/false); 8149 } 8150 CurOffsets.push_back(Offset); 8151 8152 // Count 8153 const Expr *CountExpr = OASE->getLength(); 8154 llvm::Value *Count = nullptr; 8155 if (!CountExpr) { 8156 // In Clang, once a high dimension is an array section, we construct all 8157 // the lower dimension as array section, however, for case like 8158 // arr[0:2][2], Clang construct the inner dimension as an array section 8159 // but it actually is not in an array section form according to spec. 8160 if (!OASE->getColonLocFirst().isValid() && 8161 !OASE->getColonLocSecond().isValid()) { 8162 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8163 } else { 8164 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8165 // When the length is absent it defaults to ⌈(size − 8166 // lower-bound)/stride⌉, where size is the size of the array 8167 // dimension. 8168 const Expr *StrideExpr = OASE->getStride(); 8169 llvm::Value *Stride = 8170 StrideExpr 8171 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8172 CGF.Int64Ty, /*isSigned=*/false) 8173 : nullptr; 8174 if (Stride) 8175 Count = CGF.Builder.CreateUDiv( 8176 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8177 else 8178 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8179 } 8180 } else { 8181 Count = CGF.EmitScalarExpr(CountExpr); 8182 } 8183 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8184 CurCounts.push_back(Count); 8185 8186 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8187 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8188 // Offset Count Stride 8189 // D0 0 1 4 (int) <- dummy dimension 8190 // D1 0 2 8 (2 * (1) * 4) 8191 // D2 1 2 20 (1 * (1 * 5) * 4) 8192 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8193 const Expr *StrideExpr = OASE->getStride(); 8194 llvm::Value *Stride = 8195 StrideExpr 8196 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8197 CGF.Int64Ty, /*isSigned=*/false) 8198 : nullptr; 8199 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8200 if (Stride) 8201 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8202 else 8203 CurStrides.push_back(DimProd); 8204 if (DI != DimSizes.end()) 8205 ++DI; 8206 } 8207 8208 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8209 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8210 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8211 } 8212 8213 /// Return the adjusted map modifiers if the declaration a capture refers to 8214 /// appears in a first-private clause. This is expected to be used only with 8215 /// directives that start with 'target'. 8216 MappableExprsHandler::OpenMPOffloadMappingFlags 8217 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8218 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8219 8220 // A first private variable captured by reference will use only the 8221 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8222 // declaration is known as first-private in this handler. 8223 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8224 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8225 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8226 return MappableExprsHandler::OMP_MAP_ALWAYS | 8227 MappableExprsHandler::OMP_MAP_TO; 8228 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8229 return MappableExprsHandler::OMP_MAP_TO | 8230 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8231 return MappableExprsHandler::OMP_MAP_PRIVATE | 8232 MappableExprsHandler::OMP_MAP_TO; 8233 } 8234 return MappableExprsHandler::OMP_MAP_TO | 8235 MappableExprsHandler::OMP_MAP_FROM; 8236 } 8237 8238 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8239 // Rotate by getFlagMemberOffset() bits. 8240 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8241 << getFlagMemberOffset()); 8242 } 8243 8244 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8245 OpenMPOffloadMappingFlags MemberOfFlag) { 8246 // If the entry is PTR_AND_OBJ but has not been marked with the special 8247 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8248 // marked as MEMBER_OF. 8249 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8250 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8251 return; 8252 8253 // Reset the placeholder value to prepare the flag for the assignment of the 8254 // proper MEMBER_OF value. 8255 Flags &= ~OMP_MAP_MEMBER_OF; 8256 Flags |= MemberOfFlag; 8257 } 8258 8259 void getPlainLayout(const CXXRecordDecl *RD, 8260 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8261 bool AsBase) const { 8262 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8263 8264 llvm::StructType *St = 8265 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8266 8267 unsigned NumElements = St->getNumElements(); 8268 llvm::SmallVector< 8269 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8270 RecordLayout(NumElements); 8271 8272 // Fill bases. 8273 for (const auto &I : RD->bases()) { 8274 if (I.isVirtual()) 8275 continue; 8276 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8277 // Ignore empty bases. 8278 if (Base->isEmpty() || CGF.getContext() 8279 .getASTRecordLayout(Base) 8280 .getNonVirtualSize() 8281 .isZero()) 8282 continue; 8283 8284 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8285 RecordLayout[FieldIndex] = Base; 8286 } 8287 // Fill in virtual bases. 8288 for (const auto &I : RD->vbases()) { 8289 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8290 // Ignore empty bases. 8291 if (Base->isEmpty()) 8292 continue; 8293 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8294 if (RecordLayout[FieldIndex]) 8295 continue; 8296 RecordLayout[FieldIndex] = Base; 8297 } 8298 // Fill in all the fields. 8299 assert(!RD->isUnion() && "Unexpected union."); 8300 for (const auto *Field : RD->fields()) { 8301 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8302 // will fill in later.) 8303 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8304 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8305 RecordLayout[FieldIndex] = Field; 8306 } 8307 } 8308 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8309 &Data : RecordLayout) { 8310 if (Data.isNull()) 8311 continue; 8312 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8313 getPlainLayout(Base, Layout, /*AsBase=*/true); 8314 else 8315 Layout.push_back(Data.get<const FieldDecl *>()); 8316 } 8317 } 8318 8319 /// Generate all the base pointers, section pointers, sizes, map types, and 8320 /// mappers for the extracted mappable expressions (all included in \a 8321 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8322 /// pair of the relevant declaration and index where it occurs is appended to 8323 /// the device pointers info array. 8324 void generateAllInfoForClauses( 8325 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8326 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8327 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8328 // We have to process the component lists that relate with the same 8329 // declaration in a single chunk so that we can generate the map flags 8330 // correctly. Therefore, we organize all lists in a map. 8331 enum MapKind { Present, Allocs, Other, Total }; 8332 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8333 SmallVector<SmallVector<MapInfo, 8>, 4>> 8334 Info; 8335 8336 // Helper function to fill the information map for the different supported 8337 // clauses. 8338 auto &&InfoGen = 8339 [&Info, &SkipVarSet]( 8340 const ValueDecl *D, MapKind Kind, 8341 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8342 OpenMPMapClauseKind MapType, 8343 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8344 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8345 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8346 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8347 if (SkipVarSet.contains(D)) 8348 return; 8349 auto It = Info.find(D); 8350 if (It == Info.end()) 8351 It = Info 8352 .insert(std::make_pair( 8353 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8354 .first; 8355 It->second[Kind].emplace_back( 8356 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8357 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8358 }; 8359 8360 for (const auto *Cl : Clauses) { 8361 const auto *C = dyn_cast<OMPMapClause>(Cl); 8362 if (!C) 8363 continue; 8364 MapKind Kind = Other; 8365 if (!C->getMapTypeModifiers().empty() && 8366 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8367 return K == OMPC_MAP_MODIFIER_present; 8368 })) 8369 Kind = Present; 8370 else if (C->getMapType() == OMPC_MAP_alloc) 8371 Kind = Allocs; 8372 const auto *EI = C->getVarRefs().begin(); 8373 for (const auto L : C->component_lists()) { 8374 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8375 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8376 C->getMapTypeModifiers(), llvm::None, 8377 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8378 E); 8379 ++EI; 8380 } 8381 } 8382 for (const auto *Cl : Clauses) { 8383 const auto *C = dyn_cast<OMPToClause>(Cl); 8384 if (!C) 8385 continue; 8386 MapKind Kind = Other; 8387 if (!C->getMotionModifiers().empty() && 8388 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8389 return K == OMPC_MOTION_MODIFIER_present; 8390 })) 8391 Kind = Present; 8392 const auto *EI = C->getVarRefs().begin(); 8393 for (const auto L : C->component_lists()) { 8394 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8395 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8396 C->isImplicit(), std::get<2>(L), *EI); 8397 ++EI; 8398 } 8399 } 8400 for (const auto *Cl : Clauses) { 8401 const auto *C = dyn_cast<OMPFromClause>(Cl); 8402 if (!C) 8403 continue; 8404 MapKind Kind = Other; 8405 if (!C->getMotionModifiers().empty() && 8406 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8407 return K == OMPC_MOTION_MODIFIER_present; 8408 })) 8409 Kind = Present; 8410 const auto *EI = C->getVarRefs().begin(); 8411 for (const auto L : C->component_lists()) { 8412 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8413 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8414 C->isImplicit(), std::get<2>(L), *EI); 8415 ++EI; 8416 } 8417 } 8418 8419 // Look at the use_device_ptr clause information and mark the existing map 8420 // entries as such. If there is no map information for an entry in the 8421 // use_device_ptr list, we create one with map type 'alloc' and zero size 8422 // section. It is the user fault if that was not mapped before. If there is 8423 // no map information and the pointer is a struct member, then we defer the 8424 // emission of that entry until the whole struct has been processed. 8425 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8426 SmallVector<DeferredDevicePtrEntryTy, 4>> 8427 DeferredInfo; 8428 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8429 8430 for (const auto *Cl : Clauses) { 8431 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8432 if (!C) 8433 continue; 8434 for (const auto L : C->component_lists()) { 8435 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8436 std::get<1>(L); 8437 assert(!Components.empty() && 8438 "Not expecting empty list of components!"); 8439 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8440 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8441 const Expr *IE = Components.back().getAssociatedExpression(); 8442 // If the first component is a member expression, we have to look into 8443 // 'this', which maps to null in the map of map information. Otherwise 8444 // look directly for the information. 8445 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8446 8447 // We potentially have map information for this declaration already. 8448 // Look for the first set of components that refer to it. 8449 if (It != Info.end()) { 8450 bool Found = false; 8451 for (auto &Data : It->second) { 8452 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8453 return MI.Components.back().getAssociatedDeclaration() == VD; 8454 }); 8455 // If we found a map entry, signal that the pointer has to be 8456 // returned and move on to the next declaration. Exclude cases where 8457 // the base pointer is mapped as array subscript, array section or 8458 // array shaping. The base address is passed as a pointer to base in 8459 // this case and cannot be used as a base for use_device_ptr list 8460 // item. 8461 if (CI != Data.end()) { 8462 auto PrevCI = std::next(CI->Components.rbegin()); 8463 const auto *VarD = dyn_cast<VarDecl>(VD); 8464 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8465 isa<MemberExpr>(IE) || 8466 !VD->getType().getNonReferenceType()->isPointerType() || 8467 PrevCI == CI->Components.rend() || 8468 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8469 VarD->hasLocalStorage()) { 8470 CI->ReturnDevicePointer = true; 8471 Found = true; 8472 break; 8473 } 8474 } 8475 } 8476 if (Found) 8477 continue; 8478 } 8479 8480 // We didn't find any match in our map information - generate a zero 8481 // size array section - if the pointer is a struct member we defer this 8482 // action until the whole struct has been processed. 8483 if (isa<MemberExpr>(IE)) { 8484 // Insert the pointer into Info to be processed by 8485 // generateInfoForComponentList. Because it is a member pointer 8486 // without a pointee, no entry will be generated for it, therefore 8487 // we need to generate one after the whole struct has been processed. 8488 // Nonetheless, generateInfoForComponentList must be called to take 8489 // the pointer into account for the calculation of the range of the 8490 // partial struct. 8491 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8492 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8493 nullptr); 8494 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8495 } else { 8496 llvm::Value *Ptr = 8497 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8498 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8499 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8500 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8501 UseDevicePtrCombinedInfo.Sizes.push_back( 8502 llvm::Constant::getNullValue(CGF.Int64Ty)); 8503 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8504 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8505 } 8506 } 8507 } 8508 8509 // Look at the use_device_addr clause information and mark the existing map 8510 // entries as such. If there is no map information for an entry in the 8511 // use_device_addr list, we create one with map type 'alloc' and zero size 8512 // section. It is the user fault if that was not mapped before. If there is 8513 // no map information and the pointer is a struct member, then we defer the 8514 // emission of that entry until the whole struct has been processed. 8515 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8516 for (const auto *Cl : Clauses) { 8517 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8518 if (!C) 8519 continue; 8520 for (const auto L : C->component_lists()) { 8521 assert(!std::get<1>(L).empty() && 8522 "Not expecting empty list of components!"); 8523 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8524 if (!Processed.insert(VD).second) 8525 continue; 8526 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8527 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8528 // If the first component is a member expression, we have to look into 8529 // 'this', which maps to null in the map of map information. Otherwise 8530 // look directly for the information. 8531 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8532 8533 // We potentially have map information for this declaration already. 8534 // Look for the first set of components that refer to it. 8535 if (It != Info.end()) { 8536 bool Found = false; 8537 for (auto &Data : It->second) { 8538 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8539 return MI.Components.back().getAssociatedDeclaration() == VD; 8540 }); 8541 // If we found a map entry, signal that the pointer has to be 8542 // returned and move on to the next declaration. 8543 if (CI != Data.end()) { 8544 CI->ReturnDevicePointer = true; 8545 Found = true; 8546 break; 8547 } 8548 } 8549 if (Found) 8550 continue; 8551 } 8552 8553 // We didn't find any match in our map information - generate a zero 8554 // size array section - if the pointer is a struct member we defer this 8555 // action until the whole struct has been processed. 8556 if (isa<MemberExpr>(IE)) { 8557 // Insert the pointer into Info to be processed by 8558 // generateInfoForComponentList. Because it is a member pointer 8559 // without a pointee, no entry will be generated for it, therefore 8560 // we need to generate one after the whole struct has been processed. 8561 // Nonetheless, generateInfoForComponentList must be called to take 8562 // the pointer into account for the calculation of the range of the 8563 // partial struct. 8564 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8565 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8566 nullptr, nullptr, /*ForDeviceAddr=*/true); 8567 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8568 } else { 8569 llvm::Value *Ptr; 8570 if (IE->isGLValue()) 8571 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8572 else 8573 Ptr = CGF.EmitScalarExpr(IE); 8574 CombinedInfo.Exprs.push_back(VD); 8575 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8576 CombinedInfo.Pointers.push_back(Ptr); 8577 CombinedInfo.Sizes.push_back( 8578 llvm::Constant::getNullValue(CGF.Int64Ty)); 8579 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8580 CombinedInfo.Mappers.push_back(nullptr); 8581 } 8582 } 8583 } 8584 8585 for (const auto &Data : Info) { 8586 StructRangeInfoTy PartialStruct; 8587 // Temporary generated information. 8588 MapCombinedInfoTy CurInfo; 8589 const Decl *D = Data.first; 8590 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8591 for (const auto &M : Data.second) { 8592 for (const MapInfo &L : M) { 8593 assert(!L.Components.empty() && 8594 "Not expecting declaration with no component lists."); 8595 8596 // Remember the current base pointer index. 8597 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8598 CurInfo.NonContigInfo.IsNonContiguous = 8599 L.Components.back().isNonContiguous(); 8600 generateInfoForComponentList( 8601 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8602 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8603 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8604 8605 // If this entry relates with a device pointer, set the relevant 8606 // declaration and add the 'return pointer' flag. 8607 if (L.ReturnDevicePointer) { 8608 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8609 "Unexpected number of mapped base pointers."); 8610 8611 const ValueDecl *RelevantVD = 8612 L.Components.back().getAssociatedDeclaration(); 8613 assert(RelevantVD && 8614 "No relevant declaration related with device pointer??"); 8615 8616 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8617 RelevantVD); 8618 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8619 } 8620 } 8621 } 8622 8623 // Append any pending zero-length pointers which are struct members and 8624 // used with use_device_ptr or use_device_addr. 8625 auto CI = DeferredInfo.find(Data.first); 8626 if (CI != DeferredInfo.end()) { 8627 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8628 llvm::Value *BasePtr; 8629 llvm::Value *Ptr; 8630 if (L.ForDeviceAddr) { 8631 if (L.IE->isGLValue()) 8632 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8633 else 8634 Ptr = this->CGF.EmitScalarExpr(L.IE); 8635 BasePtr = Ptr; 8636 // Entry is RETURN_PARAM. Also, set the placeholder value 8637 // MEMBER_OF=FFFF so that the entry is later updated with the 8638 // correct value of MEMBER_OF. 8639 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8640 } else { 8641 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8642 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8643 L.IE->getExprLoc()); 8644 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8645 // placeholder value MEMBER_OF=FFFF so that the entry is later 8646 // updated with the correct value of MEMBER_OF. 8647 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8648 OMP_MAP_MEMBER_OF); 8649 } 8650 CurInfo.Exprs.push_back(L.VD); 8651 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8652 CurInfo.Pointers.push_back(Ptr); 8653 CurInfo.Sizes.push_back( 8654 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8655 CurInfo.Mappers.push_back(nullptr); 8656 } 8657 } 8658 // If there is an entry in PartialStruct it means we have a struct with 8659 // individual members mapped. Emit an extra combined entry. 8660 if (PartialStruct.Base.isValid()) { 8661 CurInfo.NonContigInfo.Dims.push_back(0); 8662 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8663 } 8664 8665 // We need to append the results of this capture to what we already 8666 // have. 8667 CombinedInfo.append(CurInfo); 8668 } 8669 // Append data for use_device_ptr clauses. 8670 CombinedInfo.append(UseDevicePtrCombinedInfo); 8671 } 8672 8673 public: 8674 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8675 : CurDir(&Dir), CGF(CGF) { 8676 // Extract firstprivate clause information. 8677 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8678 for (const auto *D : C->varlists()) 8679 FirstPrivateDecls.try_emplace( 8680 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8681 // Extract implicit firstprivates from uses_allocators clauses. 8682 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8683 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8684 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8685 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8686 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8687 /*Implicit=*/true); 8688 else if (const auto *VD = dyn_cast<VarDecl>( 8689 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8690 ->getDecl())) 8691 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8692 } 8693 } 8694 // Extract device pointer clause information. 8695 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8696 for (auto L : C->component_lists()) 8697 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8698 } 8699 8700 /// Constructor for the declare mapper directive. 8701 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8702 : CurDir(&Dir), CGF(CGF) {} 8703 8704 /// Generate code for the combined entry if we have a partially mapped struct 8705 /// and take care of the mapping flags of the arguments corresponding to 8706 /// individual struct members. 8707 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8708 MapFlagsArrayTy &CurTypes, 8709 const StructRangeInfoTy &PartialStruct, 8710 const ValueDecl *VD = nullptr, 8711 bool NotTargetParams = true) const { 8712 if (CurTypes.size() == 1 && 8713 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8714 !PartialStruct.IsArraySection) 8715 return; 8716 Address LBAddr = PartialStruct.LowestElem.second; 8717 Address HBAddr = PartialStruct.HighestElem.second; 8718 if (PartialStruct.HasCompleteRecord) { 8719 LBAddr = PartialStruct.LB; 8720 HBAddr = PartialStruct.LB; 8721 } 8722 CombinedInfo.Exprs.push_back(VD); 8723 // Base is the base of the struct 8724 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8725 // Pointer is the address of the lowest element 8726 llvm::Value *LB = LBAddr.getPointer(); 8727 CombinedInfo.Pointers.push_back(LB); 8728 // There should not be a mapper for a combined entry. 8729 CombinedInfo.Mappers.push_back(nullptr); 8730 // Size is (addr of {highest+1} element) - (addr of lowest element) 8731 llvm::Value *HB = HBAddr.getPointer(); 8732 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8733 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8734 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8735 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8736 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8737 /*isSigned=*/false); 8738 CombinedInfo.Sizes.push_back(Size); 8739 // Map type is always TARGET_PARAM, if generate info for captures. 8740 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8741 : OMP_MAP_TARGET_PARAM); 8742 // If any element has the present modifier, then make sure the runtime 8743 // doesn't attempt to allocate the struct. 8744 if (CurTypes.end() != 8745 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8746 return Type & OMP_MAP_PRESENT; 8747 })) 8748 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8749 // Remove TARGET_PARAM flag from the first element 8750 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8751 8752 // All other current entries will be MEMBER_OF the combined entry 8753 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8754 // 0xFFFF in the MEMBER_OF field). 8755 OpenMPOffloadMappingFlags MemberOfFlag = 8756 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8757 for (auto &M : CurTypes) 8758 setCorrectMemberOfFlag(M, MemberOfFlag); 8759 } 8760 8761 /// Generate all the base pointers, section pointers, sizes, map types, and 8762 /// mappers for the extracted mappable expressions (all included in \a 8763 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8764 /// pair of the relevant declaration and index where it occurs is appended to 8765 /// the device pointers info array. 8766 void generateAllInfo( 8767 MapCombinedInfoTy &CombinedInfo, 8768 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8769 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8770 assert(CurDir.is<const OMPExecutableDirective *>() && 8771 "Expect a executable directive"); 8772 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8773 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8774 } 8775 8776 /// Generate all the base pointers, section pointers, sizes, map types, and 8777 /// mappers for the extracted map clauses of user-defined mapper (all included 8778 /// in \a CombinedInfo). 8779 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8780 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8781 "Expect a declare mapper directive"); 8782 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8783 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8784 } 8785 8786 /// Emit capture info for lambdas for variables captured by reference. 8787 void generateInfoForLambdaCaptures( 8788 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8789 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8790 const auto *RD = VD->getType() 8791 .getCanonicalType() 8792 .getNonReferenceType() 8793 ->getAsCXXRecordDecl(); 8794 if (!RD || !RD->isLambda()) 8795 return; 8796 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8797 LValue VDLVal = CGF.MakeAddrLValue( 8798 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8799 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8800 FieldDecl *ThisCapture = nullptr; 8801 RD->getCaptureFields(Captures, ThisCapture); 8802 if (ThisCapture) { 8803 LValue ThisLVal = 8804 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8805 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8806 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8807 VDLVal.getPointer(CGF)); 8808 CombinedInfo.Exprs.push_back(VD); 8809 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8810 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8811 CombinedInfo.Sizes.push_back( 8812 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8813 CGF.Int64Ty, /*isSigned=*/true)); 8814 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8815 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8816 CombinedInfo.Mappers.push_back(nullptr); 8817 } 8818 for (const LambdaCapture &LC : RD->captures()) { 8819 if (!LC.capturesVariable()) 8820 continue; 8821 const VarDecl *VD = LC.getCapturedVar(); 8822 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8823 continue; 8824 auto It = Captures.find(VD); 8825 assert(It != Captures.end() && "Found lambda capture without field."); 8826 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8827 if (LC.getCaptureKind() == LCK_ByRef) { 8828 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8829 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8830 VDLVal.getPointer(CGF)); 8831 CombinedInfo.Exprs.push_back(VD); 8832 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8833 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8834 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8835 CGF.getTypeSize( 8836 VD->getType().getCanonicalType().getNonReferenceType()), 8837 CGF.Int64Ty, /*isSigned=*/true)); 8838 } else { 8839 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8840 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8841 VDLVal.getPointer(CGF)); 8842 CombinedInfo.Exprs.push_back(VD); 8843 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8844 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8845 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8846 } 8847 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8848 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8849 CombinedInfo.Mappers.push_back(nullptr); 8850 } 8851 } 8852 8853 /// Set correct indices for lambdas captures. 8854 void adjustMemberOfForLambdaCaptures( 8855 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8856 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8857 MapFlagsArrayTy &Types) const { 8858 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8859 // Set correct member_of idx for all implicit lambda captures. 8860 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8861 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8862 continue; 8863 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8864 assert(BasePtr && "Unable to find base lambda address."); 8865 int TgtIdx = -1; 8866 for (unsigned J = I; J > 0; --J) { 8867 unsigned Idx = J - 1; 8868 if (Pointers[Idx] != BasePtr) 8869 continue; 8870 TgtIdx = Idx; 8871 break; 8872 } 8873 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8874 // All other current entries will be MEMBER_OF the combined entry 8875 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8876 // 0xFFFF in the MEMBER_OF field). 8877 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8878 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8879 } 8880 } 8881 8882 /// Generate the base pointers, section pointers, sizes, map types, and 8883 /// mappers associated to a given capture (all included in \a CombinedInfo). 8884 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8885 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8886 StructRangeInfoTy &PartialStruct) const { 8887 assert(!Cap->capturesVariableArrayType() && 8888 "Not expecting to generate map info for a variable array type!"); 8889 8890 // We need to know when we generating information for the first component 8891 const ValueDecl *VD = Cap->capturesThis() 8892 ? nullptr 8893 : Cap->getCapturedVar()->getCanonicalDecl(); 8894 8895 // If this declaration appears in a is_device_ptr clause we just have to 8896 // pass the pointer by value. If it is a reference to a declaration, we just 8897 // pass its value. 8898 if (DevPointersMap.count(VD)) { 8899 CombinedInfo.Exprs.push_back(VD); 8900 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8901 CombinedInfo.Pointers.push_back(Arg); 8902 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8903 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8904 /*isSigned=*/true)); 8905 CombinedInfo.Types.push_back( 8906 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8907 OMP_MAP_TARGET_PARAM); 8908 CombinedInfo.Mappers.push_back(nullptr); 8909 return; 8910 } 8911 8912 using MapData = 8913 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8914 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8915 const ValueDecl *, const Expr *>; 8916 SmallVector<MapData, 4> DeclComponentLists; 8917 assert(CurDir.is<const OMPExecutableDirective *>() && 8918 "Expect a executable directive"); 8919 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8920 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8921 const auto *EI = C->getVarRefs().begin(); 8922 for (const auto L : C->decl_component_lists(VD)) { 8923 const ValueDecl *VDecl, *Mapper; 8924 // The Expression is not correct if the mapping is implicit 8925 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8926 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8927 std::tie(VDecl, Components, Mapper) = L; 8928 assert(VDecl == VD && "We got information for the wrong declaration??"); 8929 assert(!Components.empty() && 8930 "Not expecting declaration with no component lists."); 8931 DeclComponentLists.emplace_back(Components, C->getMapType(), 8932 C->getMapTypeModifiers(), 8933 C->isImplicit(), Mapper, E); 8934 ++EI; 8935 } 8936 } 8937 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8938 const MapData &RHS) { 8939 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8940 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8941 bool HasPresent = !MapModifiers.empty() && 8942 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8943 return K == clang::OMPC_MAP_MODIFIER_present; 8944 }); 8945 bool HasAllocs = MapType == OMPC_MAP_alloc; 8946 MapModifiers = std::get<2>(RHS); 8947 MapType = std::get<1>(LHS); 8948 bool HasPresentR = 8949 !MapModifiers.empty() && 8950 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8951 return K == clang::OMPC_MAP_MODIFIER_present; 8952 }); 8953 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8954 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8955 }); 8956 8957 // Find overlapping elements (including the offset from the base element). 8958 llvm::SmallDenseMap< 8959 const MapData *, 8960 llvm::SmallVector< 8961 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8962 4> 8963 OverlappedData; 8964 size_t Count = 0; 8965 for (const MapData &L : DeclComponentLists) { 8966 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8967 OpenMPMapClauseKind MapType; 8968 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8969 bool IsImplicit; 8970 const ValueDecl *Mapper; 8971 const Expr *VarRef; 8972 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8973 L; 8974 ++Count; 8975 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8976 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8977 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8978 VarRef) = L1; 8979 auto CI = Components.rbegin(); 8980 auto CE = Components.rend(); 8981 auto SI = Components1.rbegin(); 8982 auto SE = Components1.rend(); 8983 for (; CI != CE && SI != SE; ++CI, ++SI) { 8984 if (CI->getAssociatedExpression()->getStmtClass() != 8985 SI->getAssociatedExpression()->getStmtClass()) 8986 break; 8987 // Are we dealing with different variables/fields? 8988 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8989 break; 8990 } 8991 // Found overlapping if, at least for one component, reached the head 8992 // of the components list. 8993 if (CI == CE || SI == SE) { 8994 // Ignore it if it is the same component. 8995 if (CI == CE && SI == SE) 8996 continue; 8997 const auto It = (SI == SE) ? CI : SI; 8998 // If one component is a pointer and another one is a kind of 8999 // dereference of this pointer (array subscript, section, dereference, 9000 // etc.), it is not an overlapping. 9001 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9002 std::prev(It) 9003 ->getAssociatedExpression() 9004 ->getType() 9005 ->isPointerType()) 9006 continue; 9007 const MapData &BaseData = CI == CE ? L : L1; 9008 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9009 SI == SE ? Components : Components1; 9010 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9011 OverlappedElements.getSecond().push_back(SubData); 9012 } 9013 } 9014 } 9015 // Sort the overlapped elements for each item. 9016 llvm::SmallVector<const FieldDecl *, 4> Layout; 9017 if (!OverlappedData.empty()) { 9018 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9019 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9020 while (BaseType != OrigType) { 9021 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9022 OrigType = BaseType->getPointeeOrArrayElementType(); 9023 } 9024 9025 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9026 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9027 else { 9028 const auto *RD = BaseType->getAsRecordDecl(); 9029 Layout.append(RD->field_begin(), RD->field_end()); 9030 } 9031 } 9032 for (auto &Pair : OverlappedData) { 9033 llvm::stable_sort( 9034 Pair.getSecond(), 9035 [&Layout]( 9036 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9037 OMPClauseMappableExprCommon::MappableExprComponentListRef 9038 Second) { 9039 auto CI = First.rbegin(); 9040 auto CE = First.rend(); 9041 auto SI = Second.rbegin(); 9042 auto SE = Second.rend(); 9043 for (; CI != CE && SI != SE; ++CI, ++SI) { 9044 if (CI->getAssociatedExpression()->getStmtClass() != 9045 SI->getAssociatedExpression()->getStmtClass()) 9046 break; 9047 // Are we dealing with different variables/fields? 9048 if (CI->getAssociatedDeclaration() != 9049 SI->getAssociatedDeclaration()) 9050 break; 9051 } 9052 9053 // Lists contain the same elements. 9054 if (CI == CE && SI == SE) 9055 return false; 9056 9057 // List with less elements is less than list with more elements. 9058 if (CI == CE || SI == SE) 9059 return CI == CE; 9060 9061 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9062 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9063 if (FD1->getParent() == FD2->getParent()) 9064 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9065 const auto It = 9066 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9067 return FD == FD1 || FD == FD2; 9068 }); 9069 return *It == FD1; 9070 }); 9071 } 9072 9073 // Associated with a capture, because the mapping flags depend on it. 9074 // Go through all of the elements with the overlapped elements. 9075 bool IsFirstComponentList = true; 9076 for (const auto &Pair : OverlappedData) { 9077 const MapData &L = *Pair.getFirst(); 9078 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9079 OpenMPMapClauseKind MapType; 9080 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9081 bool IsImplicit; 9082 const ValueDecl *Mapper; 9083 const Expr *VarRef; 9084 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9085 L; 9086 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9087 OverlappedComponents = Pair.getSecond(); 9088 generateInfoForComponentList( 9089 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9090 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9091 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9092 IsFirstComponentList = false; 9093 } 9094 // Go through other elements without overlapped elements. 9095 for (const MapData &L : DeclComponentLists) { 9096 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9097 OpenMPMapClauseKind MapType; 9098 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9099 bool IsImplicit; 9100 const ValueDecl *Mapper; 9101 const Expr *VarRef; 9102 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9103 L; 9104 auto It = OverlappedData.find(&L); 9105 if (It == OverlappedData.end()) 9106 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9107 Components, CombinedInfo, PartialStruct, 9108 IsFirstComponentList, IsImplicit, Mapper, 9109 /*ForDeviceAddr=*/false, VD, VarRef); 9110 IsFirstComponentList = false; 9111 } 9112 } 9113 9114 /// Generate the default map information for a given capture \a CI, 9115 /// record field declaration \a RI and captured value \a CV. 9116 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9117 const FieldDecl &RI, llvm::Value *CV, 9118 MapCombinedInfoTy &CombinedInfo) const { 9119 bool IsImplicit = true; 9120 // Do the default mapping. 9121 if (CI.capturesThis()) { 9122 CombinedInfo.Exprs.push_back(nullptr); 9123 CombinedInfo.BasePointers.push_back(CV); 9124 CombinedInfo.Pointers.push_back(CV); 9125 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9126 CombinedInfo.Sizes.push_back( 9127 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9128 CGF.Int64Ty, /*isSigned=*/true)); 9129 // Default map type. 9130 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9131 } else if (CI.capturesVariableByCopy()) { 9132 const VarDecl *VD = CI.getCapturedVar(); 9133 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9134 CombinedInfo.BasePointers.push_back(CV); 9135 CombinedInfo.Pointers.push_back(CV); 9136 if (!RI.getType()->isAnyPointerType()) { 9137 // We have to signal to the runtime captures passed by value that are 9138 // not pointers. 9139 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9140 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9141 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9142 } else { 9143 // Pointers are implicitly mapped with a zero size and no flags 9144 // (other than first map that is added for all implicit maps). 9145 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9146 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9147 } 9148 auto I = FirstPrivateDecls.find(VD); 9149 if (I != FirstPrivateDecls.end()) 9150 IsImplicit = I->getSecond(); 9151 } else { 9152 assert(CI.capturesVariable() && "Expected captured reference."); 9153 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9154 QualType ElementType = PtrTy->getPointeeType(); 9155 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9156 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9157 // The default map type for a scalar/complex type is 'to' because by 9158 // default the value doesn't have to be retrieved. For an aggregate 9159 // type, the default is 'tofrom'. 9160 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9161 const VarDecl *VD = CI.getCapturedVar(); 9162 auto I = FirstPrivateDecls.find(VD); 9163 if (I != FirstPrivateDecls.end() && 9164 VD->getType().isConstant(CGF.getContext())) { 9165 llvm::Constant *Addr = 9166 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9167 // Copy the value of the original variable to the new global copy. 9168 CGF.Builder.CreateMemCpy( 9169 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9170 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9171 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9172 // Use new global variable as the base pointers. 9173 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9174 CombinedInfo.BasePointers.push_back(Addr); 9175 CombinedInfo.Pointers.push_back(Addr); 9176 } else { 9177 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9178 CombinedInfo.BasePointers.push_back(CV); 9179 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9180 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9181 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9182 AlignmentSource::Decl)); 9183 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9184 } else { 9185 CombinedInfo.Pointers.push_back(CV); 9186 } 9187 } 9188 if (I != FirstPrivateDecls.end()) 9189 IsImplicit = I->getSecond(); 9190 } 9191 // Every default map produces a single argument which is a target parameter. 9192 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9193 9194 // Add flag stating this is an implicit map. 9195 if (IsImplicit) 9196 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9197 9198 // No user-defined mapper for default mapping. 9199 CombinedInfo.Mappers.push_back(nullptr); 9200 } 9201 }; 9202 } // anonymous namespace 9203 9204 static void emitNonContiguousDescriptor( 9205 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9206 CGOpenMPRuntime::TargetDataInfo &Info) { 9207 CodeGenModule &CGM = CGF.CGM; 9208 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9209 &NonContigInfo = CombinedInfo.NonContigInfo; 9210 9211 // Build an array of struct descriptor_dim and then assign it to 9212 // offload_args. 9213 // 9214 // struct descriptor_dim { 9215 // uint64_t offset; 9216 // uint64_t count; 9217 // uint64_t stride 9218 // }; 9219 ASTContext &C = CGF.getContext(); 9220 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9221 RecordDecl *RD; 9222 RD = C.buildImplicitRecord("descriptor_dim"); 9223 RD->startDefinition(); 9224 addFieldToRecordDecl(C, RD, Int64Ty); 9225 addFieldToRecordDecl(C, RD, Int64Ty); 9226 addFieldToRecordDecl(C, RD, Int64Ty); 9227 RD->completeDefinition(); 9228 QualType DimTy = C.getRecordType(RD); 9229 9230 enum { OffsetFD = 0, CountFD, StrideFD }; 9231 // We need two index variable here since the size of "Dims" is the same as the 9232 // size of Components, however, the size of offset, count, and stride is equal 9233 // to the size of base declaration that is non-contiguous. 9234 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9235 // Skip emitting ir if dimension size is 1 since it cannot be 9236 // non-contiguous. 9237 if (NonContigInfo.Dims[I] == 1) 9238 continue; 9239 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9240 QualType ArrayTy = 9241 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9242 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9243 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9244 unsigned RevIdx = EE - II - 1; 9245 LValue DimsLVal = CGF.MakeAddrLValue( 9246 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9247 // Offset 9248 LValue OffsetLVal = CGF.EmitLValueForField( 9249 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9250 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9251 // Count 9252 LValue CountLVal = CGF.EmitLValueForField( 9253 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9254 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9255 // Stride 9256 LValue StrideLVal = CGF.EmitLValueForField( 9257 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9258 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9259 } 9260 // args[I] = &dims 9261 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9262 DimsAddr, CGM.Int8PtrTy); 9263 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9264 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9265 Info.PointersArray, 0, I); 9266 Address PAddr(P, CGF.getPointerAlign()); 9267 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9268 ++L; 9269 } 9270 } 9271 9272 /// Emit a string constant containing the names of the values mapped to the 9273 /// offloading runtime library. 9274 llvm::Constant * 9275 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9276 MappableExprsHandler::MappingExprInfo &MapExprs) { 9277 llvm::Constant *SrcLocStr; 9278 if (!MapExprs.getMapDecl()) { 9279 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9280 } else { 9281 std::string ExprName = ""; 9282 if (MapExprs.getMapExpr()) { 9283 PrintingPolicy P(CGF.getContext().getLangOpts()); 9284 llvm::raw_string_ostream OS(ExprName); 9285 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9286 OS.flush(); 9287 } else { 9288 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9289 } 9290 9291 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9292 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9293 const char *FileName = PLoc.getFilename(); 9294 unsigned Line = PLoc.getLine(); 9295 unsigned Column = PLoc.getColumn(); 9296 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9297 Line, Column); 9298 } 9299 return SrcLocStr; 9300 } 9301 9302 /// Emit the arrays used to pass the captures and map information to the 9303 /// offloading runtime library. If there is no map or capture information, 9304 /// return nullptr by reference. 9305 static void emitOffloadingArrays( 9306 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9307 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9308 bool IsNonContiguous = false) { 9309 CodeGenModule &CGM = CGF.CGM; 9310 ASTContext &Ctx = CGF.getContext(); 9311 9312 // Reset the array information. 9313 Info.clearArrayInfo(); 9314 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9315 9316 if (Info.NumberOfPtrs) { 9317 // Detect if we have any capture size requiring runtime evaluation of the 9318 // size so that a constant array could be eventually used. 9319 bool hasRuntimeEvaluationCaptureSize = false; 9320 for (llvm::Value *S : CombinedInfo.Sizes) 9321 if (!isa<llvm::Constant>(S)) { 9322 hasRuntimeEvaluationCaptureSize = true; 9323 break; 9324 } 9325 9326 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9327 QualType PointerArrayType = Ctx.getConstantArrayType( 9328 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9329 /*IndexTypeQuals=*/0); 9330 9331 Info.BasePointersArray = 9332 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9333 Info.PointersArray = 9334 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9335 Address MappersArray = 9336 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9337 Info.MappersArray = MappersArray.getPointer(); 9338 9339 // If we don't have any VLA types or other types that require runtime 9340 // evaluation, we can use a constant array for the map sizes, otherwise we 9341 // need to fill up the arrays as we do for the pointers. 9342 QualType Int64Ty = 9343 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9344 if (hasRuntimeEvaluationCaptureSize) { 9345 QualType SizeArrayType = Ctx.getConstantArrayType( 9346 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9347 /*IndexTypeQuals=*/0); 9348 Info.SizesArray = 9349 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9350 } else { 9351 // We expect all the sizes to be constant, so we collect them to create 9352 // a constant array. 9353 SmallVector<llvm::Constant *, 16> ConstSizes; 9354 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9355 if (IsNonContiguous && 9356 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9357 ConstSizes.push_back(llvm::ConstantInt::get( 9358 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9359 } else { 9360 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9361 } 9362 } 9363 9364 auto *SizesArrayInit = llvm::ConstantArray::get( 9365 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9366 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9367 auto *SizesArrayGbl = new llvm::GlobalVariable( 9368 CGM.getModule(), SizesArrayInit->getType(), 9369 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9370 SizesArrayInit, Name); 9371 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9372 Info.SizesArray = SizesArrayGbl; 9373 } 9374 9375 // The map types are always constant so we don't need to generate code to 9376 // fill arrays. Instead, we create an array constant. 9377 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9378 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9379 llvm::Constant *MapTypesArrayInit = 9380 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9381 std::string MaptypesName = 9382 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9383 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9384 CGM.getModule(), MapTypesArrayInit->getType(), 9385 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9386 MapTypesArrayInit, MaptypesName); 9387 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9388 Info.MapTypesArray = MapTypesArrayGbl; 9389 9390 // The information types are only built if there is debug information 9391 // requested. 9392 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9393 Info.MapNamesArray = llvm::Constant::getNullValue( 9394 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9395 } else { 9396 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9397 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9398 }; 9399 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9400 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9401 9402 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9403 llvm::ArrayType::get( 9404 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9405 CombinedInfo.Exprs.size()), 9406 InfoMap); 9407 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9408 CGM.getModule(), MapNamesArrayInit->getType(), 9409 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9410 MapNamesArrayInit, 9411 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9412 Info.MapNamesArray = MapNamesArrayGbl; 9413 } 9414 9415 // If there's a present map type modifier, it must not be applied to the end 9416 // of a region, so generate a separate map type array in that case. 9417 if (Info.separateBeginEndCalls()) { 9418 bool EndMapTypesDiffer = false; 9419 for (uint64_t &Type : Mapping) { 9420 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9421 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9422 EndMapTypesDiffer = true; 9423 } 9424 } 9425 if (EndMapTypesDiffer) { 9426 MapTypesArrayInit = 9427 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9428 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9429 MapTypesArrayGbl = new llvm::GlobalVariable( 9430 CGM.getModule(), MapTypesArrayInit->getType(), 9431 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9432 MapTypesArrayInit, MaptypesName); 9433 MapTypesArrayGbl->setUnnamedAddr( 9434 llvm::GlobalValue::UnnamedAddr::Global); 9435 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9436 } 9437 } 9438 9439 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9440 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9441 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9442 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9443 Info.BasePointersArray, 0, I); 9444 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9445 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9446 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9447 CGF.Builder.CreateStore(BPVal, BPAddr); 9448 9449 if (Info.requiresDevicePointerInfo()) 9450 if (const ValueDecl *DevVD = 9451 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9452 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9453 9454 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9455 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9456 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9457 Info.PointersArray, 0, I); 9458 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9459 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9460 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9461 CGF.Builder.CreateStore(PVal, PAddr); 9462 9463 if (hasRuntimeEvaluationCaptureSize) { 9464 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9465 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9466 Info.SizesArray, 9467 /*Idx0=*/0, 9468 /*Idx1=*/I); 9469 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9470 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9471 CGM.Int64Ty, 9472 /*isSigned=*/true), 9473 SAddr); 9474 } 9475 9476 // Fill up the mapper array. 9477 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9478 if (CombinedInfo.Mappers[I]) { 9479 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9480 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9481 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9482 Info.HasMapper = true; 9483 } 9484 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9485 CGF.Builder.CreateStore(MFunc, MAddr); 9486 } 9487 } 9488 9489 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9490 Info.NumberOfPtrs == 0) 9491 return; 9492 9493 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9494 } 9495 9496 namespace { 9497 /// Additional arguments for emitOffloadingArraysArgument function. 9498 struct ArgumentsOptions { 9499 bool ForEndCall = false; 9500 ArgumentsOptions() = default; 9501 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9502 }; 9503 } // namespace 9504 9505 /// Emit the arguments to be passed to the runtime library based on the 9506 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9507 /// ForEndCall, emit map types to be passed for the end of the region instead of 9508 /// the beginning. 9509 static void emitOffloadingArraysArgument( 9510 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9511 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9512 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9513 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9514 const ArgumentsOptions &Options = ArgumentsOptions()) { 9515 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9516 "expected region end call to runtime only when end call is separate"); 9517 CodeGenModule &CGM = CGF.CGM; 9518 if (Info.NumberOfPtrs) { 9519 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9520 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9521 Info.BasePointersArray, 9522 /*Idx0=*/0, /*Idx1=*/0); 9523 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9524 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9525 Info.PointersArray, 9526 /*Idx0=*/0, 9527 /*Idx1=*/0); 9528 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9529 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9530 /*Idx0=*/0, /*Idx1=*/0); 9531 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9532 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9533 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9534 : Info.MapTypesArray, 9535 /*Idx0=*/0, 9536 /*Idx1=*/0); 9537 9538 // Only emit the mapper information arrays if debug information is 9539 // requested. 9540 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9541 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9542 else 9543 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9544 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9545 Info.MapNamesArray, 9546 /*Idx0=*/0, 9547 /*Idx1=*/0); 9548 // If there is no user-defined mapper, set the mapper array to nullptr to 9549 // avoid an unnecessary data privatization 9550 if (!Info.HasMapper) 9551 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9552 else 9553 MappersArrayArg = 9554 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9555 } else { 9556 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9557 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9558 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9559 MapTypesArrayArg = 9560 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9561 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9562 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9563 } 9564 } 9565 9566 /// Check for inner distribute directive. 9567 static const OMPExecutableDirective * 9568 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9569 const auto *CS = D.getInnermostCapturedStmt(); 9570 const auto *Body = 9571 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9572 const Stmt *ChildStmt = 9573 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9574 9575 if (const auto *NestedDir = 9576 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9577 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9578 switch (D.getDirectiveKind()) { 9579 case OMPD_target: 9580 if (isOpenMPDistributeDirective(DKind)) 9581 return NestedDir; 9582 if (DKind == OMPD_teams) { 9583 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9584 /*IgnoreCaptured=*/true); 9585 if (!Body) 9586 return nullptr; 9587 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9588 if (const auto *NND = 9589 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9590 DKind = NND->getDirectiveKind(); 9591 if (isOpenMPDistributeDirective(DKind)) 9592 return NND; 9593 } 9594 } 9595 return nullptr; 9596 case OMPD_target_teams: 9597 if (isOpenMPDistributeDirective(DKind)) 9598 return NestedDir; 9599 return nullptr; 9600 case OMPD_target_parallel: 9601 case OMPD_target_simd: 9602 case OMPD_target_parallel_for: 9603 case OMPD_target_parallel_for_simd: 9604 return nullptr; 9605 case OMPD_target_teams_distribute: 9606 case OMPD_target_teams_distribute_simd: 9607 case OMPD_target_teams_distribute_parallel_for: 9608 case OMPD_target_teams_distribute_parallel_for_simd: 9609 case OMPD_parallel: 9610 case OMPD_for: 9611 case OMPD_parallel_for: 9612 case OMPD_parallel_master: 9613 case OMPD_parallel_sections: 9614 case OMPD_for_simd: 9615 case OMPD_parallel_for_simd: 9616 case OMPD_cancel: 9617 case OMPD_cancellation_point: 9618 case OMPD_ordered: 9619 case OMPD_threadprivate: 9620 case OMPD_allocate: 9621 case OMPD_task: 9622 case OMPD_simd: 9623 case OMPD_tile: 9624 case OMPD_sections: 9625 case OMPD_section: 9626 case OMPD_single: 9627 case OMPD_master: 9628 case OMPD_critical: 9629 case OMPD_taskyield: 9630 case OMPD_barrier: 9631 case OMPD_taskwait: 9632 case OMPD_taskgroup: 9633 case OMPD_atomic: 9634 case OMPD_flush: 9635 case OMPD_depobj: 9636 case OMPD_scan: 9637 case OMPD_teams: 9638 case OMPD_target_data: 9639 case OMPD_target_exit_data: 9640 case OMPD_target_enter_data: 9641 case OMPD_distribute: 9642 case OMPD_distribute_simd: 9643 case OMPD_distribute_parallel_for: 9644 case OMPD_distribute_parallel_for_simd: 9645 case OMPD_teams_distribute: 9646 case OMPD_teams_distribute_simd: 9647 case OMPD_teams_distribute_parallel_for: 9648 case OMPD_teams_distribute_parallel_for_simd: 9649 case OMPD_target_update: 9650 case OMPD_declare_simd: 9651 case OMPD_declare_variant: 9652 case OMPD_begin_declare_variant: 9653 case OMPD_end_declare_variant: 9654 case OMPD_declare_target: 9655 case OMPD_end_declare_target: 9656 case OMPD_declare_reduction: 9657 case OMPD_declare_mapper: 9658 case OMPD_taskloop: 9659 case OMPD_taskloop_simd: 9660 case OMPD_master_taskloop: 9661 case OMPD_master_taskloop_simd: 9662 case OMPD_parallel_master_taskloop: 9663 case OMPD_parallel_master_taskloop_simd: 9664 case OMPD_requires: 9665 case OMPD_unknown: 9666 default: 9667 llvm_unreachable("Unexpected directive."); 9668 } 9669 } 9670 9671 return nullptr; 9672 } 9673 9674 /// Emit the user-defined mapper function. The code generation follows the 9675 /// pattern in the example below. 9676 /// \code 9677 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9678 /// void *base, void *begin, 9679 /// int64_t size, int64_t type, 9680 /// void *name = nullptr) { 9681 /// // Allocate space for an array section first or add a base/begin for 9682 /// // pointer dereference. 9683 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9684 /// !maptype.IsDelete) 9685 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9686 /// size*sizeof(Ty), clearToFromMember(type)); 9687 /// // Map members. 9688 /// for (unsigned i = 0; i < size; i++) { 9689 /// // For each component specified by this mapper: 9690 /// for (auto c : begin[i]->all_components) { 9691 /// if (c.hasMapper()) 9692 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9693 /// c.arg_type, c.arg_name); 9694 /// else 9695 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9696 /// c.arg_begin, c.arg_size, c.arg_type, 9697 /// c.arg_name); 9698 /// } 9699 /// } 9700 /// // Delete the array section. 9701 /// if (size > 1 && maptype.IsDelete) 9702 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9703 /// size*sizeof(Ty), clearToFromMember(type)); 9704 /// } 9705 /// \endcode 9706 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9707 CodeGenFunction *CGF) { 9708 if (UDMMap.count(D) > 0) 9709 return; 9710 ASTContext &C = CGM.getContext(); 9711 QualType Ty = D->getType(); 9712 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9713 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9714 auto *MapperVarDecl = 9715 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9716 SourceLocation Loc = D->getLocation(); 9717 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9718 9719 // Prepare mapper function arguments and attributes. 9720 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9721 C.VoidPtrTy, ImplicitParamDecl::Other); 9722 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9723 ImplicitParamDecl::Other); 9724 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9725 C.VoidPtrTy, ImplicitParamDecl::Other); 9726 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9727 ImplicitParamDecl::Other); 9728 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9729 ImplicitParamDecl::Other); 9730 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9731 ImplicitParamDecl::Other); 9732 FunctionArgList Args; 9733 Args.push_back(&HandleArg); 9734 Args.push_back(&BaseArg); 9735 Args.push_back(&BeginArg); 9736 Args.push_back(&SizeArg); 9737 Args.push_back(&TypeArg); 9738 Args.push_back(&NameArg); 9739 const CGFunctionInfo &FnInfo = 9740 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9741 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9742 SmallString<64> TyStr; 9743 llvm::raw_svector_ostream Out(TyStr); 9744 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9745 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9746 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9747 Name, &CGM.getModule()); 9748 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9749 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9750 // Start the mapper function code generation. 9751 CodeGenFunction MapperCGF(CGM); 9752 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9753 // Compute the starting and end addresses of array elements. 9754 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9755 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9756 C.getPointerType(Int64Ty), Loc); 9757 // Prepare common arguments for array initiation and deletion. 9758 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9759 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9760 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9761 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9762 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9763 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9764 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9765 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9766 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9767 // Convert the size in bytes into the number of array elements. 9768 Size = MapperCGF.Builder.CreateExactUDiv( 9769 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9770 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9771 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9772 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9773 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9774 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9775 C.getPointerType(Int64Ty), Loc); 9776 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9777 MapperCGF.GetAddrOfLocalVar(&NameArg), 9778 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9779 9780 // Emit array initiation if this is an array section and \p MapType indicates 9781 // that memory allocation is required. 9782 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9783 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9784 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9785 9786 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9787 9788 // Emit the loop header block. 9789 MapperCGF.EmitBlock(HeadBB); 9790 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9791 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9792 // Evaluate whether the initial condition is satisfied. 9793 llvm::Value *IsEmpty = 9794 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9795 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9796 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9797 9798 // Emit the loop body block. 9799 MapperCGF.EmitBlock(BodyBB); 9800 llvm::BasicBlock *LastBB = BodyBB; 9801 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9802 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9803 PtrPHI->addIncoming(PtrBegin, EntryBB); 9804 Address PtrCurrent = 9805 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9806 .getAlignment() 9807 .alignmentOfArrayElement(ElementSize)); 9808 // Privatize the declared variable of mapper to be the current array element. 9809 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9810 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9811 (void)Scope.Privatize(); 9812 9813 // Get map clause information. Fill up the arrays with all mapped variables. 9814 MappableExprsHandler::MapCombinedInfoTy Info; 9815 MappableExprsHandler MEHandler(*D, MapperCGF); 9816 MEHandler.generateAllInfoForMapper(Info); 9817 9818 // Call the runtime API __tgt_mapper_num_components to get the number of 9819 // pre-existing components. 9820 llvm::Value *OffloadingArgs[] = {Handle}; 9821 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9822 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9823 OMPRTL___tgt_mapper_num_components), 9824 OffloadingArgs); 9825 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9826 PreviousSize, 9827 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9828 9829 // Fill up the runtime mapper handle for all components. 9830 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9831 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9832 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9833 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9834 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9835 llvm::Value *CurSizeArg = Info.Sizes[I]; 9836 llvm::Value *CurNameArg = 9837 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9838 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9839 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9840 9841 // Extract the MEMBER_OF field from the map type. 9842 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9843 llvm::Value *MemberMapType = 9844 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9845 9846 // Combine the map type inherited from user-defined mapper with that 9847 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9848 // bits of the \a MapType, which is the input argument of the mapper 9849 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9850 // bits of MemberMapType. 9851 // [OpenMP 5.0], 1.2.6. map-type decay. 9852 // | alloc | to | from | tofrom | release | delete 9853 // ---------------------------------------------------------- 9854 // alloc | alloc | alloc | alloc | alloc | release | delete 9855 // to | alloc | to | alloc | to | release | delete 9856 // from | alloc | alloc | from | from | release | delete 9857 // tofrom | alloc | to | from | tofrom | release | delete 9858 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9859 MapType, 9860 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9861 MappableExprsHandler::OMP_MAP_FROM)); 9862 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9863 llvm::BasicBlock *AllocElseBB = 9864 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9865 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9866 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9867 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9868 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9869 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9870 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9871 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9872 MapperCGF.EmitBlock(AllocBB); 9873 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9874 MemberMapType, 9875 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9876 MappableExprsHandler::OMP_MAP_FROM))); 9877 MapperCGF.Builder.CreateBr(EndBB); 9878 MapperCGF.EmitBlock(AllocElseBB); 9879 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9880 LeftToFrom, 9881 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9882 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9883 // In case of to, clear OMP_MAP_FROM. 9884 MapperCGF.EmitBlock(ToBB); 9885 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9886 MemberMapType, 9887 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9888 MapperCGF.Builder.CreateBr(EndBB); 9889 MapperCGF.EmitBlock(ToElseBB); 9890 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9891 LeftToFrom, 9892 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9893 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9894 // In case of from, clear OMP_MAP_TO. 9895 MapperCGF.EmitBlock(FromBB); 9896 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9897 MemberMapType, 9898 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9899 // In case of tofrom, do nothing. 9900 MapperCGF.EmitBlock(EndBB); 9901 LastBB = EndBB; 9902 llvm::PHINode *CurMapType = 9903 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9904 CurMapType->addIncoming(AllocMapType, AllocBB); 9905 CurMapType->addIncoming(ToMapType, ToBB); 9906 CurMapType->addIncoming(FromMapType, FromBB); 9907 CurMapType->addIncoming(MemberMapType, ToElseBB); 9908 9909 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9910 CurSizeArg, CurMapType, CurNameArg}; 9911 if (Info.Mappers[I]) { 9912 // Call the corresponding mapper function. 9913 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9914 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9915 assert(MapperFunc && "Expect a valid mapper function is available."); 9916 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9917 } else { 9918 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9919 // data structure. 9920 MapperCGF.EmitRuntimeCall( 9921 OMPBuilder.getOrCreateRuntimeFunction( 9922 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9923 OffloadingArgs); 9924 } 9925 } 9926 9927 // Update the pointer to point to the next element that needs to be mapped, 9928 // and check whether we have mapped all elements. 9929 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9930 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9931 PtrPHI->addIncoming(PtrNext, LastBB); 9932 llvm::Value *IsDone = 9933 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9934 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9935 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9936 9937 MapperCGF.EmitBlock(ExitBB); 9938 // Emit array deletion if this is an array section and \p MapType indicates 9939 // that deletion is required. 9940 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9941 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9942 9943 // Emit the function exit block. 9944 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9945 MapperCGF.FinishFunction(); 9946 UDMMap.try_emplace(D, Fn); 9947 if (CGF) { 9948 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9949 Decls.second.push_back(D); 9950 } 9951 } 9952 9953 /// Emit the array initialization or deletion portion for user-defined mapper 9954 /// code generation. First, it evaluates whether an array section is mapped and 9955 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9956 /// true, and \a MapType indicates to not delete this array, array 9957 /// initialization code is generated. If \a IsInit is false, and \a MapType 9958 /// indicates to not this array, array deletion code is generated. 9959 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9960 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9961 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9962 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9963 bool IsInit) { 9964 StringRef Prefix = IsInit ? ".init" : ".del"; 9965 9966 // Evaluate if this is an array section. 9967 llvm::BasicBlock *BodyBB = 9968 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9969 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9970 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9971 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9972 MapType, 9973 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9974 llvm::Value *DeleteCond; 9975 llvm::Value *Cond; 9976 if (IsInit) { 9977 // base != begin? 9978 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9979 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9980 // IsPtrAndObj? 9981 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9982 MapType, 9983 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9984 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9985 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9986 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9987 DeleteCond = MapperCGF.Builder.CreateIsNull( 9988 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9989 } else { 9990 Cond = IsArray; 9991 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9992 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9993 } 9994 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9995 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9996 9997 MapperCGF.EmitBlock(BodyBB); 9998 // Get the array size by multiplying element size and element number (i.e., \p 9999 // Size). 10000 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10001 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10002 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10003 // memory allocation/deletion purpose only. 10004 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10005 MapType, 10006 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10007 MappableExprsHandler::OMP_MAP_FROM))); 10008 MapTypeArg = MapperCGF.Builder.CreateOr( 10009 MapTypeArg, 10010 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10011 10012 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10013 // data structure. 10014 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10015 ArraySize, MapTypeArg, MapName}; 10016 MapperCGF.EmitRuntimeCall( 10017 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10018 OMPRTL___tgt_push_mapper_component), 10019 OffloadingArgs); 10020 } 10021 10022 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10023 const OMPDeclareMapperDecl *D) { 10024 auto I = UDMMap.find(D); 10025 if (I != UDMMap.end()) 10026 return I->second; 10027 emitUserDefinedMapper(D); 10028 return UDMMap.lookup(D); 10029 } 10030 10031 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10032 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10033 llvm::Value *DeviceID, 10034 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10035 const OMPLoopDirective &D)> 10036 SizeEmitter) { 10037 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10038 const OMPExecutableDirective *TD = &D; 10039 // Get nested teams distribute kind directive, if any. 10040 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10041 TD = getNestedDistributeDirective(CGM.getContext(), D); 10042 if (!TD) 10043 return; 10044 const auto *LD = cast<OMPLoopDirective>(TD); 10045 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10046 PrePostActionTy &) { 10047 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10048 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10049 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10050 CGF.EmitRuntimeCall( 10051 OMPBuilder.getOrCreateRuntimeFunction( 10052 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10053 Args); 10054 } 10055 }; 10056 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10057 } 10058 10059 void CGOpenMPRuntime::emitTargetCall( 10060 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10061 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10062 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10063 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10064 const OMPLoopDirective &D)> 10065 SizeEmitter) { 10066 if (!CGF.HaveInsertPoint()) 10067 return; 10068 10069 assert(OutlinedFn && "Invalid outlined function!"); 10070 10071 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10072 D.hasClausesOfKind<OMPNowaitClause>(); 10073 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10074 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10075 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10076 PrePostActionTy &) { 10077 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10078 }; 10079 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10080 10081 CodeGenFunction::OMPTargetDataInfo InputInfo; 10082 llvm::Value *MapTypesArray = nullptr; 10083 llvm::Value *MapNamesArray = nullptr; 10084 // Fill up the pointer arrays and transfer execution to the device. 10085 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10086 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10087 &CapturedVars, 10088 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10089 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10090 // Reverse offloading is not supported, so just execute on the host. 10091 if (RequiresOuterTask) { 10092 CapturedVars.clear(); 10093 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10094 } 10095 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10096 return; 10097 } 10098 10099 // On top of the arrays that were filled up, the target offloading call 10100 // takes as arguments the device id as well as the host pointer. The host 10101 // pointer is used by the runtime library to identify the current target 10102 // region, so it only has to be unique and not necessarily point to 10103 // anything. It could be the pointer to the outlined function that 10104 // implements the target region, but we aren't using that so that the 10105 // compiler doesn't need to keep that, and could therefore inline the host 10106 // function if proven worthwhile during optimization. 10107 10108 // From this point on, we need to have an ID of the target region defined. 10109 assert(OutlinedFnID && "Invalid outlined function ID!"); 10110 10111 // Emit device ID if any. 10112 llvm::Value *DeviceID; 10113 if (Device.getPointer()) { 10114 assert((Device.getInt() == OMPC_DEVICE_unknown || 10115 Device.getInt() == OMPC_DEVICE_device_num) && 10116 "Expected device_num modifier."); 10117 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10118 DeviceID = 10119 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10120 } else { 10121 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10122 } 10123 10124 // Emit the number of elements in the offloading arrays. 10125 llvm::Value *PointerNum = 10126 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10127 10128 // Return value of the runtime offloading call. 10129 llvm::Value *Return; 10130 10131 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10132 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10133 10134 // Source location for the ident struct 10135 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10136 10137 // Emit tripcount for the target loop-based directive. 10138 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10139 10140 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10141 // The target region is an outlined function launched by the runtime 10142 // via calls __tgt_target() or __tgt_target_teams(). 10143 // 10144 // __tgt_target() launches a target region with one team and one thread, 10145 // executing a serial region. This master thread may in turn launch 10146 // more threads within its team upon encountering a parallel region, 10147 // however, no additional teams can be launched on the device. 10148 // 10149 // __tgt_target_teams() launches a target region with one or more teams, 10150 // each with one or more threads. This call is required for target 10151 // constructs such as: 10152 // 'target teams' 10153 // 'target' / 'teams' 10154 // 'target teams distribute parallel for' 10155 // 'target parallel' 10156 // and so on. 10157 // 10158 // Note that on the host and CPU targets, the runtime implementation of 10159 // these calls simply call the outlined function without forking threads. 10160 // The outlined functions themselves have runtime calls to 10161 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10162 // the compiler in emitTeamsCall() and emitParallelCall(). 10163 // 10164 // In contrast, on the NVPTX target, the implementation of 10165 // __tgt_target_teams() launches a GPU kernel with the requested number 10166 // of teams and threads so no additional calls to the runtime are required. 10167 if (NumTeams) { 10168 // If we have NumTeams defined this means that we have an enclosed teams 10169 // region. Therefore we also expect to have NumThreads defined. These two 10170 // values should be defined in the presence of a teams directive, 10171 // regardless of having any clauses associated. If the user is using teams 10172 // but no clauses, these two values will be the default that should be 10173 // passed to the runtime library - a 32-bit integer with the value zero. 10174 assert(NumThreads && "Thread limit expression should be available along " 10175 "with number of teams."); 10176 llvm::Value *OffloadingArgs[] = {RTLoc, 10177 DeviceID, 10178 OutlinedFnID, 10179 PointerNum, 10180 InputInfo.BasePointersArray.getPointer(), 10181 InputInfo.PointersArray.getPointer(), 10182 InputInfo.SizesArray.getPointer(), 10183 MapTypesArray, 10184 MapNamesArray, 10185 InputInfo.MappersArray.getPointer(), 10186 NumTeams, 10187 NumThreads}; 10188 Return = CGF.EmitRuntimeCall( 10189 OMPBuilder.getOrCreateRuntimeFunction( 10190 CGM.getModule(), HasNowait 10191 ? OMPRTL___tgt_target_teams_nowait_mapper 10192 : OMPRTL___tgt_target_teams_mapper), 10193 OffloadingArgs); 10194 } else { 10195 llvm::Value *OffloadingArgs[] = {RTLoc, 10196 DeviceID, 10197 OutlinedFnID, 10198 PointerNum, 10199 InputInfo.BasePointersArray.getPointer(), 10200 InputInfo.PointersArray.getPointer(), 10201 InputInfo.SizesArray.getPointer(), 10202 MapTypesArray, 10203 MapNamesArray, 10204 InputInfo.MappersArray.getPointer()}; 10205 Return = CGF.EmitRuntimeCall( 10206 OMPBuilder.getOrCreateRuntimeFunction( 10207 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10208 : OMPRTL___tgt_target_mapper), 10209 OffloadingArgs); 10210 } 10211 10212 // Check the error code and execute the host version if required. 10213 llvm::BasicBlock *OffloadFailedBlock = 10214 CGF.createBasicBlock("omp_offload.failed"); 10215 llvm::BasicBlock *OffloadContBlock = 10216 CGF.createBasicBlock("omp_offload.cont"); 10217 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10218 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10219 10220 CGF.EmitBlock(OffloadFailedBlock); 10221 if (RequiresOuterTask) { 10222 CapturedVars.clear(); 10223 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10224 } 10225 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10226 CGF.EmitBranch(OffloadContBlock); 10227 10228 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10229 }; 10230 10231 // Notify that the host version must be executed. 10232 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10233 RequiresOuterTask](CodeGenFunction &CGF, 10234 PrePostActionTy &) { 10235 if (RequiresOuterTask) { 10236 CapturedVars.clear(); 10237 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10238 } 10239 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10240 }; 10241 10242 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10243 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10244 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10245 // Fill up the arrays with all the captured variables. 10246 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10247 10248 // Get mappable expression information. 10249 MappableExprsHandler MEHandler(D, CGF); 10250 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10251 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10252 10253 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10254 auto *CV = CapturedVars.begin(); 10255 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10256 CE = CS.capture_end(); 10257 CI != CE; ++CI, ++RI, ++CV) { 10258 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10259 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10260 10261 // VLA sizes are passed to the outlined region by copy and do not have map 10262 // information associated. 10263 if (CI->capturesVariableArrayType()) { 10264 CurInfo.Exprs.push_back(nullptr); 10265 CurInfo.BasePointers.push_back(*CV); 10266 CurInfo.Pointers.push_back(*CV); 10267 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10268 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10269 // Copy to the device as an argument. No need to retrieve it. 10270 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10271 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10272 MappableExprsHandler::OMP_MAP_IMPLICIT); 10273 CurInfo.Mappers.push_back(nullptr); 10274 } else { 10275 // If we have any information in the map clause, we use it, otherwise we 10276 // just do a default mapping. 10277 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10278 if (!CI->capturesThis()) 10279 MappedVarSet.insert(CI->getCapturedVar()); 10280 else 10281 MappedVarSet.insert(nullptr); 10282 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10283 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10284 // Generate correct mapping for variables captured by reference in 10285 // lambdas. 10286 if (CI->capturesVariable()) 10287 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10288 CurInfo, LambdaPointers); 10289 } 10290 // We expect to have at least an element of information for this capture. 10291 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10292 "Non-existing map pointer for capture!"); 10293 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10294 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10295 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10296 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10297 "Inconsistent map information sizes!"); 10298 10299 // If there is an entry in PartialStruct it means we have a struct with 10300 // individual members mapped. Emit an extra combined entry. 10301 if (PartialStruct.Base.isValid()) { 10302 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10303 MEHandler.emitCombinedEntry( 10304 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10305 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10306 } 10307 10308 // We need to append the results of this capture to what we already have. 10309 CombinedInfo.append(CurInfo); 10310 } 10311 // Adjust MEMBER_OF flags for the lambdas captures. 10312 MEHandler.adjustMemberOfForLambdaCaptures( 10313 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10314 CombinedInfo.Types); 10315 // Map any list items in a map clause that were not captures because they 10316 // weren't referenced within the construct. 10317 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10318 10319 TargetDataInfo Info; 10320 // Fill up the arrays and create the arguments. 10321 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10322 emitOffloadingArraysArgument( 10323 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10324 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10325 {/*ForEndTask=*/false}); 10326 10327 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10328 InputInfo.BasePointersArray = 10329 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10330 InputInfo.PointersArray = 10331 Address(Info.PointersArray, CGM.getPointerAlign()); 10332 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10333 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10334 MapTypesArray = Info.MapTypesArray; 10335 MapNamesArray = Info.MapNamesArray; 10336 if (RequiresOuterTask) 10337 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10338 else 10339 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10340 }; 10341 10342 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10343 CodeGenFunction &CGF, PrePostActionTy &) { 10344 if (RequiresOuterTask) { 10345 CodeGenFunction::OMPTargetDataInfo InputInfo; 10346 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10347 } else { 10348 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10349 } 10350 }; 10351 10352 // If we have a target function ID it means that we need to support 10353 // offloading, otherwise, just execute on the host. We need to execute on host 10354 // regardless of the conditional in the if clause if, e.g., the user do not 10355 // specify target triples. 10356 if (OutlinedFnID) { 10357 if (IfCond) { 10358 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10359 } else { 10360 RegionCodeGenTy ThenRCG(TargetThenGen); 10361 ThenRCG(CGF); 10362 } 10363 } else { 10364 RegionCodeGenTy ElseRCG(TargetElseGen); 10365 ElseRCG(CGF); 10366 } 10367 } 10368 10369 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10370 StringRef ParentName) { 10371 if (!S) 10372 return; 10373 10374 // Codegen OMP target directives that offload compute to the device. 10375 bool RequiresDeviceCodegen = 10376 isa<OMPExecutableDirective>(S) && 10377 isOpenMPTargetExecutionDirective( 10378 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10379 10380 if (RequiresDeviceCodegen) { 10381 const auto &E = *cast<OMPExecutableDirective>(S); 10382 unsigned DeviceID; 10383 unsigned FileID; 10384 unsigned Line; 10385 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10386 FileID, Line); 10387 10388 // Is this a target region that should not be emitted as an entry point? If 10389 // so just signal we are done with this target region. 10390 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10391 ParentName, Line)) 10392 return; 10393 10394 switch (E.getDirectiveKind()) { 10395 case OMPD_target: 10396 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10397 cast<OMPTargetDirective>(E)); 10398 break; 10399 case OMPD_target_parallel: 10400 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10401 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10402 break; 10403 case OMPD_target_teams: 10404 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10405 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10406 break; 10407 case OMPD_target_teams_distribute: 10408 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10409 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10410 break; 10411 case OMPD_target_teams_distribute_simd: 10412 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10413 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10414 break; 10415 case OMPD_target_parallel_for: 10416 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10417 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10418 break; 10419 case OMPD_target_parallel_for_simd: 10420 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10421 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10422 break; 10423 case OMPD_target_simd: 10424 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10425 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10426 break; 10427 case OMPD_target_teams_distribute_parallel_for: 10428 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10429 CGM, ParentName, 10430 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10431 break; 10432 case OMPD_target_teams_distribute_parallel_for_simd: 10433 CodeGenFunction:: 10434 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10435 CGM, ParentName, 10436 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10437 break; 10438 case OMPD_parallel: 10439 case OMPD_for: 10440 case OMPD_parallel_for: 10441 case OMPD_parallel_master: 10442 case OMPD_parallel_sections: 10443 case OMPD_for_simd: 10444 case OMPD_parallel_for_simd: 10445 case OMPD_cancel: 10446 case OMPD_cancellation_point: 10447 case OMPD_ordered: 10448 case OMPD_threadprivate: 10449 case OMPD_allocate: 10450 case OMPD_task: 10451 case OMPD_simd: 10452 case OMPD_tile: 10453 case OMPD_sections: 10454 case OMPD_section: 10455 case OMPD_single: 10456 case OMPD_master: 10457 case OMPD_critical: 10458 case OMPD_taskyield: 10459 case OMPD_barrier: 10460 case OMPD_taskwait: 10461 case OMPD_taskgroup: 10462 case OMPD_atomic: 10463 case OMPD_flush: 10464 case OMPD_depobj: 10465 case OMPD_scan: 10466 case OMPD_teams: 10467 case OMPD_target_data: 10468 case OMPD_target_exit_data: 10469 case OMPD_target_enter_data: 10470 case OMPD_distribute: 10471 case OMPD_distribute_simd: 10472 case OMPD_distribute_parallel_for: 10473 case OMPD_distribute_parallel_for_simd: 10474 case OMPD_teams_distribute: 10475 case OMPD_teams_distribute_simd: 10476 case OMPD_teams_distribute_parallel_for: 10477 case OMPD_teams_distribute_parallel_for_simd: 10478 case OMPD_target_update: 10479 case OMPD_declare_simd: 10480 case OMPD_declare_variant: 10481 case OMPD_begin_declare_variant: 10482 case OMPD_end_declare_variant: 10483 case OMPD_declare_target: 10484 case OMPD_end_declare_target: 10485 case OMPD_declare_reduction: 10486 case OMPD_declare_mapper: 10487 case OMPD_taskloop: 10488 case OMPD_taskloop_simd: 10489 case OMPD_master_taskloop: 10490 case OMPD_master_taskloop_simd: 10491 case OMPD_parallel_master_taskloop: 10492 case OMPD_parallel_master_taskloop_simd: 10493 case OMPD_requires: 10494 case OMPD_unknown: 10495 default: 10496 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10497 } 10498 return; 10499 } 10500 10501 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10502 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10503 return; 10504 10505 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10506 return; 10507 } 10508 10509 // If this is a lambda function, look into its body. 10510 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10511 S = L->getBody(); 10512 10513 // Keep looking for target regions recursively. 10514 for (const Stmt *II : S->children()) 10515 scanForTargetRegionsFunctions(II, ParentName); 10516 } 10517 10518 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10519 // If emitting code for the host, we do not process FD here. Instead we do 10520 // the normal code generation. 10521 if (!CGM.getLangOpts().OpenMPIsDevice) { 10522 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10523 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10524 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10525 // Do not emit device_type(nohost) functions for the host. 10526 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10527 return true; 10528 } 10529 return false; 10530 } 10531 10532 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10533 // Try to detect target regions in the function. 10534 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10535 StringRef Name = CGM.getMangledName(GD); 10536 scanForTargetRegionsFunctions(FD->getBody(), Name); 10537 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10538 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10539 // Do not emit device_type(nohost) functions for the host. 10540 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10541 return true; 10542 } 10543 10544 // Do not to emit function if it is not marked as declare target. 10545 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10546 AlreadyEmittedTargetDecls.count(VD) == 0; 10547 } 10548 10549 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10550 if (!CGM.getLangOpts().OpenMPIsDevice) 10551 return false; 10552 10553 // Check if there are Ctors/Dtors in this declaration and look for target 10554 // regions in it. We use the complete variant to produce the kernel name 10555 // mangling. 10556 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10557 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10558 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10559 StringRef ParentName = 10560 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10561 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10562 } 10563 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10564 StringRef ParentName = 10565 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10566 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10567 } 10568 } 10569 10570 // Do not to emit variable if it is not marked as declare target. 10571 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10572 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10573 cast<VarDecl>(GD.getDecl())); 10574 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10575 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10576 HasRequiresUnifiedSharedMemory)) { 10577 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10578 return true; 10579 } 10580 return false; 10581 } 10582 10583 llvm::Constant * 10584 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10585 const VarDecl *VD) { 10586 assert(VD->getType().isConstant(CGM.getContext()) && 10587 "Expected constant variable."); 10588 StringRef VarName; 10589 llvm::Constant *Addr; 10590 llvm::GlobalValue::LinkageTypes Linkage; 10591 QualType Ty = VD->getType(); 10592 SmallString<128> Buffer; 10593 { 10594 unsigned DeviceID; 10595 unsigned FileID; 10596 unsigned Line; 10597 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10598 FileID, Line); 10599 llvm::raw_svector_ostream OS(Buffer); 10600 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10601 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10602 VarName = OS.str(); 10603 } 10604 Linkage = llvm::GlobalValue::InternalLinkage; 10605 Addr = 10606 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10607 getDefaultFirstprivateAddressSpace()); 10608 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10609 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10610 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10611 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10612 VarName, Addr, VarSize, 10613 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10614 return Addr; 10615 } 10616 10617 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10618 llvm::Constant *Addr) { 10619 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10620 !CGM.getLangOpts().OpenMPIsDevice) 10621 return; 10622 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10623 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10624 if (!Res) { 10625 if (CGM.getLangOpts().OpenMPIsDevice) { 10626 // Register non-target variables being emitted in device code (debug info 10627 // may cause this). 10628 StringRef VarName = CGM.getMangledName(VD); 10629 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10630 } 10631 return; 10632 } 10633 // Register declare target variables. 10634 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10635 StringRef VarName; 10636 CharUnits VarSize; 10637 llvm::GlobalValue::LinkageTypes Linkage; 10638 10639 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10640 !HasRequiresUnifiedSharedMemory) { 10641 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10642 VarName = CGM.getMangledName(VD); 10643 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10644 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10645 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10646 } else { 10647 VarSize = CharUnits::Zero(); 10648 } 10649 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10650 // Temp solution to prevent optimizations of the internal variables. 10651 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10652 std::string RefName = getName({VarName, "ref"}); 10653 if (!CGM.GetGlobalValue(RefName)) { 10654 llvm::Constant *AddrRef = 10655 getOrCreateInternalVariable(Addr->getType(), RefName); 10656 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10657 GVAddrRef->setConstant(/*Val=*/true); 10658 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10659 GVAddrRef->setInitializer(Addr); 10660 CGM.addCompilerUsedGlobal(GVAddrRef); 10661 } 10662 } 10663 } else { 10664 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10665 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10666 HasRequiresUnifiedSharedMemory)) && 10667 "Declare target attribute must link or to with unified memory."); 10668 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10669 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10670 else 10671 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10672 10673 if (CGM.getLangOpts().OpenMPIsDevice) { 10674 VarName = Addr->getName(); 10675 Addr = nullptr; 10676 } else { 10677 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10678 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10679 } 10680 VarSize = CGM.getPointerSize(); 10681 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10682 } 10683 10684 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10685 VarName, Addr, VarSize, Flags, Linkage); 10686 } 10687 10688 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10689 if (isa<FunctionDecl>(GD.getDecl()) || 10690 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10691 return emitTargetFunctions(GD); 10692 10693 return emitTargetGlobalVariable(GD); 10694 } 10695 10696 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10697 for (const VarDecl *VD : DeferredGlobalVariables) { 10698 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10699 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10700 if (!Res) 10701 continue; 10702 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10703 !HasRequiresUnifiedSharedMemory) { 10704 CGM.EmitGlobal(VD); 10705 } else { 10706 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10707 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10708 HasRequiresUnifiedSharedMemory)) && 10709 "Expected link clause or to clause with unified memory."); 10710 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10711 } 10712 } 10713 } 10714 10715 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10716 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10717 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10718 " Expected target-based directive."); 10719 } 10720 10721 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10722 for (const OMPClause *Clause : D->clauselists()) { 10723 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10724 HasRequiresUnifiedSharedMemory = true; 10725 } else if (const auto *AC = 10726 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10727 switch (AC->getAtomicDefaultMemOrderKind()) { 10728 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10729 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10730 break; 10731 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10732 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10733 break; 10734 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10735 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10736 break; 10737 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10738 break; 10739 } 10740 } 10741 } 10742 } 10743 10744 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10745 return RequiresAtomicOrdering; 10746 } 10747 10748 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10749 LangAS &AS) { 10750 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10751 return false; 10752 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10753 switch(A->getAllocatorType()) { 10754 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10755 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10756 // Not supported, fallback to the default mem space. 10757 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10758 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10759 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10760 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10761 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10762 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10763 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10764 AS = LangAS::Default; 10765 return true; 10766 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10767 llvm_unreachable("Expected predefined allocator for the variables with the " 10768 "static storage."); 10769 } 10770 return false; 10771 } 10772 10773 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10774 return HasRequiresUnifiedSharedMemory; 10775 } 10776 10777 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10778 CodeGenModule &CGM) 10779 : CGM(CGM) { 10780 if (CGM.getLangOpts().OpenMPIsDevice) { 10781 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10782 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10783 } 10784 } 10785 10786 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10787 if (CGM.getLangOpts().OpenMPIsDevice) 10788 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10789 } 10790 10791 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10792 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10793 return true; 10794 10795 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10796 // Do not to emit function if it is marked as declare target as it was already 10797 // emitted. 10798 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10799 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10800 if (auto *F = dyn_cast_or_null<llvm::Function>( 10801 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10802 return !F->isDeclaration(); 10803 return false; 10804 } 10805 return true; 10806 } 10807 10808 return !AlreadyEmittedTargetDecls.insert(D).second; 10809 } 10810 10811 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10812 // If we don't have entries or if we are emitting code for the device, we 10813 // don't need to do anything. 10814 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10815 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10816 (OffloadEntriesInfoManager.empty() && 10817 !HasEmittedDeclareTargetRegion && 10818 !HasEmittedTargetRegion)) 10819 return nullptr; 10820 10821 // Create and register the function that handles the requires directives. 10822 ASTContext &C = CGM.getContext(); 10823 10824 llvm::Function *RequiresRegFn; 10825 { 10826 CodeGenFunction CGF(CGM); 10827 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10828 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10829 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10830 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10831 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10832 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10833 // TODO: check for other requires clauses. 10834 // The requires directive takes effect only when a target region is 10835 // present in the compilation unit. Otherwise it is ignored and not 10836 // passed to the runtime. This avoids the runtime from throwing an error 10837 // for mismatching requires clauses across compilation units that don't 10838 // contain at least 1 target region. 10839 assert((HasEmittedTargetRegion || 10840 HasEmittedDeclareTargetRegion || 10841 !OffloadEntriesInfoManager.empty()) && 10842 "Target or declare target region expected."); 10843 if (HasRequiresUnifiedSharedMemory) 10844 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10845 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10846 CGM.getModule(), OMPRTL___tgt_register_requires), 10847 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10848 CGF.FinishFunction(); 10849 } 10850 return RequiresRegFn; 10851 } 10852 10853 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10854 const OMPExecutableDirective &D, 10855 SourceLocation Loc, 10856 llvm::Function *OutlinedFn, 10857 ArrayRef<llvm::Value *> CapturedVars) { 10858 if (!CGF.HaveInsertPoint()) 10859 return; 10860 10861 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10862 CodeGenFunction::RunCleanupsScope Scope(CGF); 10863 10864 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10865 llvm::Value *Args[] = { 10866 RTLoc, 10867 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10868 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10869 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10870 RealArgs.append(std::begin(Args), std::end(Args)); 10871 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10872 10873 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10874 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10875 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10876 } 10877 10878 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10879 const Expr *NumTeams, 10880 const Expr *ThreadLimit, 10881 SourceLocation Loc) { 10882 if (!CGF.HaveInsertPoint()) 10883 return; 10884 10885 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10886 10887 llvm::Value *NumTeamsVal = 10888 NumTeams 10889 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10890 CGF.CGM.Int32Ty, /* isSigned = */ true) 10891 : CGF.Builder.getInt32(0); 10892 10893 llvm::Value *ThreadLimitVal = 10894 ThreadLimit 10895 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10896 CGF.CGM.Int32Ty, /* isSigned = */ true) 10897 : CGF.Builder.getInt32(0); 10898 10899 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10900 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10901 ThreadLimitVal}; 10902 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10903 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10904 PushNumTeamsArgs); 10905 } 10906 10907 void CGOpenMPRuntime::emitTargetDataCalls( 10908 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10909 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10910 if (!CGF.HaveInsertPoint()) 10911 return; 10912 10913 // Action used to replace the default codegen action and turn privatization 10914 // off. 10915 PrePostActionTy NoPrivAction; 10916 10917 // Generate the code for the opening of the data environment. Capture all the 10918 // arguments of the runtime call by reference because they are used in the 10919 // closing of the region. 10920 auto &&BeginThenGen = [this, &D, Device, &Info, 10921 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10922 // Fill up the arrays with all the mapped variables. 10923 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10924 10925 // Get map clause information. 10926 MappableExprsHandler MEHandler(D, CGF); 10927 MEHandler.generateAllInfo(CombinedInfo); 10928 10929 // Fill up the arrays and create the arguments. 10930 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10931 /*IsNonContiguous=*/true); 10932 10933 llvm::Value *BasePointersArrayArg = nullptr; 10934 llvm::Value *PointersArrayArg = nullptr; 10935 llvm::Value *SizesArrayArg = nullptr; 10936 llvm::Value *MapTypesArrayArg = nullptr; 10937 llvm::Value *MapNamesArrayArg = nullptr; 10938 llvm::Value *MappersArrayArg = nullptr; 10939 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10940 SizesArrayArg, MapTypesArrayArg, 10941 MapNamesArrayArg, MappersArrayArg, Info); 10942 10943 // Emit device ID if any. 10944 llvm::Value *DeviceID = nullptr; 10945 if (Device) { 10946 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10947 CGF.Int64Ty, /*isSigned=*/true); 10948 } else { 10949 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10950 } 10951 10952 // Emit the number of elements in the offloading arrays. 10953 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10954 // 10955 // Source location for the ident struct 10956 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10957 10958 llvm::Value *OffloadingArgs[] = {RTLoc, 10959 DeviceID, 10960 PointerNum, 10961 BasePointersArrayArg, 10962 PointersArrayArg, 10963 SizesArrayArg, 10964 MapTypesArrayArg, 10965 MapNamesArrayArg, 10966 MappersArrayArg}; 10967 CGF.EmitRuntimeCall( 10968 OMPBuilder.getOrCreateRuntimeFunction( 10969 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10970 OffloadingArgs); 10971 10972 // If device pointer privatization is required, emit the body of the region 10973 // here. It will have to be duplicated: with and without privatization. 10974 if (!Info.CaptureDeviceAddrMap.empty()) 10975 CodeGen(CGF); 10976 }; 10977 10978 // Generate code for the closing of the data region. 10979 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10980 PrePostActionTy &) { 10981 assert(Info.isValid() && "Invalid data environment closing arguments."); 10982 10983 llvm::Value *BasePointersArrayArg = nullptr; 10984 llvm::Value *PointersArrayArg = nullptr; 10985 llvm::Value *SizesArrayArg = nullptr; 10986 llvm::Value *MapTypesArrayArg = nullptr; 10987 llvm::Value *MapNamesArrayArg = nullptr; 10988 llvm::Value *MappersArrayArg = nullptr; 10989 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10990 SizesArrayArg, MapTypesArrayArg, 10991 MapNamesArrayArg, MappersArrayArg, Info, 10992 {/*ForEndCall=*/true}); 10993 10994 // Emit device ID if any. 10995 llvm::Value *DeviceID = nullptr; 10996 if (Device) { 10997 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10998 CGF.Int64Ty, /*isSigned=*/true); 10999 } else { 11000 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11001 } 11002 11003 // Emit the number of elements in the offloading arrays. 11004 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11005 11006 // Source location for the ident struct 11007 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11008 11009 llvm::Value *OffloadingArgs[] = {RTLoc, 11010 DeviceID, 11011 PointerNum, 11012 BasePointersArrayArg, 11013 PointersArrayArg, 11014 SizesArrayArg, 11015 MapTypesArrayArg, 11016 MapNamesArrayArg, 11017 MappersArrayArg}; 11018 CGF.EmitRuntimeCall( 11019 OMPBuilder.getOrCreateRuntimeFunction( 11020 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11021 OffloadingArgs); 11022 }; 11023 11024 // If we need device pointer privatization, we need to emit the body of the 11025 // region with no privatization in the 'else' branch of the conditional. 11026 // Otherwise, we don't have to do anything. 11027 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11028 PrePostActionTy &) { 11029 if (!Info.CaptureDeviceAddrMap.empty()) { 11030 CodeGen.setAction(NoPrivAction); 11031 CodeGen(CGF); 11032 } 11033 }; 11034 11035 // We don't have to do anything to close the region if the if clause evaluates 11036 // to false. 11037 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11038 11039 if (IfCond) { 11040 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11041 } else { 11042 RegionCodeGenTy RCG(BeginThenGen); 11043 RCG(CGF); 11044 } 11045 11046 // If we don't require privatization of device pointers, we emit the body in 11047 // between the runtime calls. This avoids duplicating the body code. 11048 if (Info.CaptureDeviceAddrMap.empty()) { 11049 CodeGen.setAction(NoPrivAction); 11050 CodeGen(CGF); 11051 } 11052 11053 if (IfCond) { 11054 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11055 } else { 11056 RegionCodeGenTy RCG(EndThenGen); 11057 RCG(CGF); 11058 } 11059 } 11060 11061 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11062 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11063 const Expr *Device) { 11064 if (!CGF.HaveInsertPoint()) 11065 return; 11066 11067 assert((isa<OMPTargetEnterDataDirective>(D) || 11068 isa<OMPTargetExitDataDirective>(D) || 11069 isa<OMPTargetUpdateDirective>(D)) && 11070 "Expecting either target enter, exit data, or update directives."); 11071 11072 CodeGenFunction::OMPTargetDataInfo InputInfo; 11073 llvm::Value *MapTypesArray = nullptr; 11074 llvm::Value *MapNamesArray = nullptr; 11075 // Generate the code for the opening of the data environment. 11076 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11077 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11078 // Emit device ID if any. 11079 llvm::Value *DeviceID = nullptr; 11080 if (Device) { 11081 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11082 CGF.Int64Ty, /*isSigned=*/true); 11083 } else { 11084 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11085 } 11086 11087 // Emit the number of elements in the offloading arrays. 11088 llvm::Constant *PointerNum = 11089 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11090 11091 // Source location for the ident struct 11092 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11093 11094 llvm::Value *OffloadingArgs[] = {RTLoc, 11095 DeviceID, 11096 PointerNum, 11097 InputInfo.BasePointersArray.getPointer(), 11098 InputInfo.PointersArray.getPointer(), 11099 InputInfo.SizesArray.getPointer(), 11100 MapTypesArray, 11101 MapNamesArray, 11102 InputInfo.MappersArray.getPointer()}; 11103 11104 // Select the right runtime function call for each standalone 11105 // directive. 11106 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11107 RuntimeFunction RTLFn; 11108 switch (D.getDirectiveKind()) { 11109 case OMPD_target_enter_data: 11110 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11111 : OMPRTL___tgt_target_data_begin_mapper; 11112 break; 11113 case OMPD_target_exit_data: 11114 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11115 : OMPRTL___tgt_target_data_end_mapper; 11116 break; 11117 case OMPD_target_update: 11118 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11119 : OMPRTL___tgt_target_data_update_mapper; 11120 break; 11121 case OMPD_parallel: 11122 case OMPD_for: 11123 case OMPD_parallel_for: 11124 case OMPD_parallel_master: 11125 case OMPD_parallel_sections: 11126 case OMPD_for_simd: 11127 case OMPD_parallel_for_simd: 11128 case OMPD_cancel: 11129 case OMPD_cancellation_point: 11130 case OMPD_ordered: 11131 case OMPD_threadprivate: 11132 case OMPD_allocate: 11133 case OMPD_task: 11134 case OMPD_simd: 11135 case OMPD_tile: 11136 case OMPD_sections: 11137 case OMPD_section: 11138 case OMPD_single: 11139 case OMPD_master: 11140 case OMPD_critical: 11141 case OMPD_taskyield: 11142 case OMPD_barrier: 11143 case OMPD_taskwait: 11144 case OMPD_taskgroup: 11145 case OMPD_atomic: 11146 case OMPD_flush: 11147 case OMPD_depobj: 11148 case OMPD_scan: 11149 case OMPD_teams: 11150 case OMPD_target_data: 11151 case OMPD_distribute: 11152 case OMPD_distribute_simd: 11153 case OMPD_distribute_parallel_for: 11154 case OMPD_distribute_parallel_for_simd: 11155 case OMPD_teams_distribute: 11156 case OMPD_teams_distribute_simd: 11157 case OMPD_teams_distribute_parallel_for: 11158 case OMPD_teams_distribute_parallel_for_simd: 11159 case OMPD_declare_simd: 11160 case OMPD_declare_variant: 11161 case OMPD_begin_declare_variant: 11162 case OMPD_end_declare_variant: 11163 case OMPD_declare_target: 11164 case OMPD_end_declare_target: 11165 case OMPD_declare_reduction: 11166 case OMPD_declare_mapper: 11167 case OMPD_taskloop: 11168 case OMPD_taskloop_simd: 11169 case OMPD_master_taskloop: 11170 case OMPD_master_taskloop_simd: 11171 case OMPD_parallel_master_taskloop: 11172 case OMPD_parallel_master_taskloop_simd: 11173 case OMPD_target: 11174 case OMPD_target_simd: 11175 case OMPD_target_teams_distribute: 11176 case OMPD_target_teams_distribute_simd: 11177 case OMPD_target_teams_distribute_parallel_for: 11178 case OMPD_target_teams_distribute_parallel_for_simd: 11179 case OMPD_target_teams: 11180 case OMPD_target_parallel: 11181 case OMPD_target_parallel_for: 11182 case OMPD_target_parallel_for_simd: 11183 case OMPD_requires: 11184 case OMPD_unknown: 11185 default: 11186 llvm_unreachable("Unexpected standalone target data directive."); 11187 break; 11188 } 11189 CGF.EmitRuntimeCall( 11190 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11191 OffloadingArgs); 11192 }; 11193 11194 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11195 &MapNamesArray](CodeGenFunction &CGF, 11196 PrePostActionTy &) { 11197 // Fill up the arrays with all the mapped variables. 11198 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11199 11200 // Get map clause information. 11201 MappableExprsHandler MEHandler(D, CGF); 11202 MEHandler.generateAllInfo(CombinedInfo); 11203 11204 TargetDataInfo Info; 11205 // Fill up the arrays and create the arguments. 11206 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11207 /*IsNonContiguous=*/true); 11208 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11209 D.hasClausesOfKind<OMPNowaitClause>(); 11210 emitOffloadingArraysArgument( 11211 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11212 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11213 {/*ForEndTask=*/false}); 11214 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11215 InputInfo.BasePointersArray = 11216 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11217 InputInfo.PointersArray = 11218 Address(Info.PointersArray, CGM.getPointerAlign()); 11219 InputInfo.SizesArray = 11220 Address(Info.SizesArray, CGM.getPointerAlign()); 11221 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11222 MapTypesArray = Info.MapTypesArray; 11223 MapNamesArray = Info.MapNamesArray; 11224 if (RequiresOuterTask) 11225 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11226 else 11227 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11228 }; 11229 11230 if (IfCond) { 11231 emitIfClause(CGF, IfCond, TargetThenGen, 11232 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11233 } else { 11234 RegionCodeGenTy ThenRCG(TargetThenGen); 11235 ThenRCG(CGF); 11236 } 11237 } 11238 11239 namespace { 11240 /// Kind of parameter in a function with 'declare simd' directive. 11241 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11242 /// Attribute set of the parameter. 11243 struct ParamAttrTy { 11244 ParamKindTy Kind = Vector; 11245 llvm::APSInt StrideOrArg; 11246 llvm::APSInt Alignment; 11247 }; 11248 } // namespace 11249 11250 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11251 ArrayRef<ParamAttrTy> ParamAttrs) { 11252 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11253 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11254 // of that clause. The VLEN value must be power of 2. 11255 // In other case the notion of the function`s "characteristic data type" (CDT) 11256 // is used to compute the vector length. 11257 // CDT is defined in the following order: 11258 // a) For non-void function, the CDT is the return type. 11259 // b) If the function has any non-uniform, non-linear parameters, then the 11260 // CDT is the type of the first such parameter. 11261 // c) If the CDT determined by a) or b) above is struct, union, or class 11262 // type which is pass-by-value (except for the type that maps to the 11263 // built-in complex data type), the characteristic data type is int. 11264 // d) If none of the above three cases is applicable, the CDT is int. 11265 // The VLEN is then determined based on the CDT and the size of vector 11266 // register of that ISA for which current vector version is generated. The 11267 // VLEN is computed using the formula below: 11268 // VLEN = sizeof(vector_register) / sizeof(CDT), 11269 // where vector register size specified in section 3.2.1 Registers and the 11270 // Stack Frame of original AMD64 ABI document. 11271 QualType RetType = FD->getReturnType(); 11272 if (RetType.isNull()) 11273 return 0; 11274 ASTContext &C = FD->getASTContext(); 11275 QualType CDT; 11276 if (!RetType.isNull() && !RetType->isVoidType()) { 11277 CDT = RetType; 11278 } else { 11279 unsigned Offset = 0; 11280 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11281 if (ParamAttrs[Offset].Kind == Vector) 11282 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11283 ++Offset; 11284 } 11285 if (CDT.isNull()) { 11286 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11287 if (ParamAttrs[I + Offset].Kind == Vector) { 11288 CDT = FD->getParamDecl(I)->getType(); 11289 break; 11290 } 11291 } 11292 } 11293 } 11294 if (CDT.isNull()) 11295 CDT = C.IntTy; 11296 CDT = CDT->getCanonicalTypeUnqualified(); 11297 if (CDT->isRecordType() || CDT->isUnionType()) 11298 CDT = C.IntTy; 11299 return C.getTypeSize(CDT); 11300 } 11301 11302 static void 11303 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11304 const llvm::APSInt &VLENVal, 11305 ArrayRef<ParamAttrTy> ParamAttrs, 11306 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11307 struct ISADataTy { 11308 char ISA; 11309 unsigned VecRegSize; 11310 }; 11311 ISADataTy ISAData[] = { 11312 { 11313 'b', 128 11314 }, // SSE 11315 { 11316 'c', 256 11317 }, // AVX 11318 { 11319 'd', 256 11320 }, // AVX2 11321 { 11322 'e', 512 11323 }, // AVX512 11324 }; 11325 llvm::SmallVector<char, 2> Masked; 11326 switch (State) { 11327 case OMPDeclareSimdDeclAttr::BS_Undefined: 11328 Masked.push_back('N'); 11329 Masked.push_back('M'); 11330 break; 11331 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11332 Masked.push_back('N'); 11333 break; 11334 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11335 Masked.push_back('M'); 11336 break; 11337 } 11338 for (char Mask : Masked) { 11339 for (const ISADataTy &Data : ISAData) { 11340 SmallString<256> Buffer; 11341 llvm::raw_svector_ostream Out(Buffer); 11342 Out << "_ZGV" << Data.ISA << Mask; 11343 if (!VLENVal) { 11344 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11345 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11346 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11347 } else { 11348 Out << VLENVal; 11349 } 11350 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11351 switch (ParamAttr.Kind){ 11352 case LinearWithVarStride: 11353 Out << 's' << ParamAttr.StrideOrArg; 11354 break; 11355 case Linear: 11356 Out << 'l'; 11357 if (ParamAttr.StrideOrArg != 1) 11358 Out << ParamAttr.StrideOrArg; 11359 break; 11360 case Uniform: 11361 Out << 'u'; 11362 break; 11363 case Vector: 11364 Out << 'v'; 11365 break; 11366 } 11367 if (!!ParamAttr.Alignment) 11368 Out << 'a' << ParamAttr.Alignment; 11369 } 11370 Out << '_' << Fn->getName(); 11371 Fn->addFnAttr(Out.str()); 11372 } 11373 } 11374 } 11375 11376 // This are the Functions that are needed to mangle the name of the 11377 // vector functions generated by the compiler, according to the rules 11378 // defined in the "Vector Function ABI specifications for AArch64", 11379 // available at 11380 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11381 11382 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11383 /// 11384 /// TODO: Need to implement the behavior for reference marked with a 11385 /// var or no linear modifiers (1.b in the section). For this, we 11386 /// need to extend ParamKindTy to support the linear modifiers. 11387 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11388 QT = QT.getCanonicalType(); 11389 11390 if (QT->isVoidType()) 11391 return false; 11392 11393 if (Kind == ParamKindTy::Uniform) 11394 return false; 11395 11396 if (Kind == ParamKindTy::Linear) 11397 return false; 11398 11399 // TODO: Handle linear references with modifiers 11400 11401 if (Kind == ParamKindTy::LinearWithVarStride) 11402 return false; 11403 11404 return true; 11405 } 11406 11407 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11408 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11409 QT = QT.getCanonicalType(); 11410 unsigned Size = C.getTypeSize(QT); 11411 11412 // Only scalars and complex within 16 bytes wide set PVB to true. 11413 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11414 return false; 11415 11416 if (QT->isFloatingType()) 11417 return true; 11418 11419 if (QT->isIntegerType()) 11420 return true; 11421 11422 if (QT->isPointerType()) 11423 return true; 11424 11425 // TODO: Add support for complex types (section 3.1.2, item 2). 11426 11427 return false; 11428 } 11429 11430 /// Computes the lane size (LS) of a return type or of an input parameter, 11431 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11432 /// TODO: Add support for references, section 3.2.1, item 1. 11433 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11434 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11435 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11436 if (getAArch64PBV(PTy, C)) 11437 return C.getTypeSize(PTy); 11438 } 11439 if (getAArch64PBV(QT, C)) 11440 return C.getTypeSize(QT); 11441 11442 return C.getTypeSize(C.getUIntPtrType()); 11443 } 11444 11445 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11446 // signature of the scalar function, as defined in 3.2.2 of the 11447 // AAVFABI. 11448 static std::tuple<unsigned, unsigned, bool> 11449 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11450 QualType RetType = FD->getReturnType().getCanonicalType(); 11451 11452 ASTContext &C = FD->getASTContext(); 11453 11454 bool OutputBecomesInput = false; 11455 11456 llvm::SmallVector<unsigned, 8> Sizes; 11457 if (!RetType->isVoidType()) { 11458 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11459 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11460 OutputBecomesInput = true; 11461 } 11462 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11463 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11464 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11465 } 11466 11467 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11468 // The LS of a function parameter / return value can only be a power 11469 // of 2, starting from 8 bits, up to 128. 11470 assert(std::all_of(Sizes.begin(), Sizes.end(), 11471 [](unsigned Size) { 11472 return Size == 8 || Size == 16 || Size == 32 || 11473 Size == 64 || Size == 128; 11474 }) && 11475 "Invalid size"); 11476 11477 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11478 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11479 OutputBecomesInput); 11480 } 11481 11482 /// Mangle the parameter part of the vector function name according to 11483 /// their OpenMP classification. The mangling function is defined in 11484 /// section 3.5 of the AAVFABI. 11485 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11486 SmallString<256> Buffer; 11487 llvm::raw_svector_ostream Out(Buffer); 11488 for (const auto &ParamAttr : ParamAttrs) { 11489 switch (ParamAttr.Kind) { 11490 case LinearWithVarStride: 11491 Out << "ls" << ParamAttr.StrideOrArg; 11492 break; 11493 case Linear: 11494 Out << 'l'; 11495 // Don't print the step value if it is not present or if it is 11496 // equal to 1. 11497 if (ParamAttr.StrideOrArg != 1) 11498 Out << ParamAttr.StrideOrArg; 11499 break; 11500 case Uniform: 11501 Out << 'u'; 11502 break; 11503 case Vector: 11504 Out << 'v'; 11505 break; 11506 } 11507 11508 if (!!ParamAttr.Alignment) 11509 Out << 'a' << ParamAttr.Alignment; 11510 } 11511 11512 return std::string(Out.str()); 11513 } 11514 11515 // Function used to add the attribute. The parameter `VLEN` is 11516 // templated to allow the use of "x" when targeting scalable functions 11517 // for SVE. 11518 template <typename T> 11519 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11520 char ISA, StringRef ParSeq, 11521 StringRef MangledName, bool OutputBecomesInput, 11522 llvm::Function *Fn) { 11523 SmallString<256> Buffer; 11524 llvm::raw_svector_ostream Out(Buffer); 11525 Out << Prefix << ISA << LMask << VLEN; 11526 if (OutputBecomesInput) 11527 Out << "v"; 11528 Out << ParSeq << "_" << MangledName; 11529 Fn->addFnAttr(Out.str()); 11530 } 11531 11532 // Helper function to generate the Advanced SIMD names depending on 11533 // the value of the NDS when simdlen is not present. 11534 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11535 StringRef Prefix, char ISA, 11536 StringRef ParSeq, StringRef MangledName, 11537 bool OutputBecomesInput, 11538 llvm::Function *Fn) { 11539 switch (NDS) { 11540 case 8: 11541 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11542 OutputBecomesInput, Fn); 11543 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11544 OutputBecomesInput, Fn); 11545 break; 11546 case 16: 11547 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11548 OutputBecomesInput, Fn); 11549 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11550 OutputBecomesInput, Fn); 11551 break; 11552 case 32: 11553 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11554 OutputBecomesInput, Fn); 11555 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11556 OutputBecomesInput, Fn); 11557 break; 11558 case 64: 11559 case 128: 11560 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11561 OutputBecomesInput, Fn); 11562 break; 11563 default: 11564 llvm_unreachable("Scalar type is too wide."); 11565 } 11566 } 11567 11568 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11569 static void emitAArch64DeclareSimdFunction( 11570 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11571 ArrayRef<ParamAttrTy> ParamAttrs, 11572 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11573 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11574 11575 // Get basic data for building the vector signature. 11576 const auto Data = getNDSWDS(FD, ParamAttrs); 11577 const unsigned NDS = std::get<0>(Data); 11578 const unsigned WDS = std::get<1>(Data); 11579 const bool OutputBecomesInput = std::get<2>(Data); 11580 11581 // Check the values provided via `simdlen` by the user. 11582 // 1. A `simdlen(1)` doesn't produce vector signatures, 11583 if (UserVLEN == 1) { 11584 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11585 DiagnosticsEngine::Warning, 11586 "The clause simdlen(1) has no effect when targeting aarch64."); 11587 CGM.getDiags().Report(SLoc, DiagID); 11588 return; 11589 } 11590 11591 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11592 // Advanced SIMD output. 11593 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11594 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11595 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11596 "power of 2 when targeting Advanced SIMD."); 11597 CGM.getDiags().Report(SLoc, DiagID); 11598 return; 11599 } 11600 11601 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11602 // limits. 11603 if (ISA == 's' && UserVLEN != 0) { 11604 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11605 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11606 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11607 "lanes in the architectural constraints " 11608 "for SVE (min is 128-bit, max is " 11609 "2048-bit, by steps of 128-bit)"); 11610 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11611 return; 11612 } 11613 } 11614 11615 // Sort out parameter sequence. 11616 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11617 StringRef Prefix = "_ZGV"; 11618 // Generate simdlen from user input (if any). 11619 if (UserVLEN) { 11620 if (ISA == 's') { 11621 // SVE generates only a masked function. 11622 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11623 OutputBecomesInput, Fn); 11624 } else { 11625 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11626 // Advanced SIMD generates one or two functions, depending on 11627 // the `[not]inbranch` clause. 11628 switch (State) { 11629 case OMPDeclareSimdDeclAttr::BS_Undefined: 11630 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11631 OutputBecomesInput, Fn); 11632 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11633 OutputBecomesInput, Fn); 11634 break; 11635 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11636 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11637 OutputBecomesInput, Fn); 11638 break; 11639 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11640 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11641 OutputBecomesInput, Fn); 11642 break; 11643 } 11644 } 11645 } else { 11646 // If no user simdlen is provided, follow the AAVFABI rules for 11647 // generating the vector length. 11648 if (ISA == 's') { 11649 // SVE, section 3.4.1, item 1. 11650 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11651 OutputBecomesInput, Fn); 11652 } else { 11653 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11654 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11655 // two vector names depending on the use of the clause 11656 // `[not]inbranch`. 11657 switch (State) { 11658 case OMPDeclareSimdDeclAttr::BS_Undefined: 11659 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11660 OutputBecomesInput, Fn); 11661 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11662 OutputBecomesInput, Fn); 11663 break; 11664 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11665 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11666 OutputBecomesInput, Fn); 11667 break; 11668 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11669 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11670 OutputBecomesInput, Fn); 11671 break; 11672 } 11673 } 11674 } 11675 } 11676 11677 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11678 llvm::Function *Fn) { 11679 ASTContext &C = CGM.getContext(); 11680 FD = FD->getMostRecentDecl(); 11681 // Map params to their positions in function decl. 11682 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11683 if (isa<CXXMethodDecl>(FD)) 11684 ParamPositions.try_emplace(FD, 0); 11685 unsigned ParamPos = ParamPositions.size(); 11686 for (const ParmVarDecl *P : FD->parameters()) { 11687 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11688 ++ParamPos; 11689 } 11690 while (FD) { 11691 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11692 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11693 // Mark uniform parameters. 11694 for (const Expr *E : Attr->uniforms()) { 11695 E = E->IgnoreParenImpCasts(); 11696 unsigned Pos; 11697 if (isa<CXXThisExpr>(E)) { 11698 Pos = ParamPositions[FD]; 11699 } else { 11700 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11701 ->getCanonicalDecl(); 11702 Pos = ParamPositions[PVD]; 11703 } 11704 ParamAttrs[Pos].Kind = Uniform; 11705 } 11706 // Get alignment info. 11707 auto NI = Attr->alignments_begin(); 11708 for (const Expr *E : Attr->aligneds()) { 11709 E = E->IgnoreParenImpCasts(); 11710 unsigned Pos; 11711 QualType ParmTy; 11712 if (isa<CXXThisExpr>(E)) { 11713 Pos = ParamPositions[FD]; 11714 ParmTy = E->getType(); 11715 } else { 11716 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11717 ->getCanonicalDecl(); 11718 Pos = ParamPositions[PVD]; 11719 ParmTy = PVD->getType(); 11720 } 11721 ParamAttrs[Pos].Alignment = 11722 (*NI) 11723 ? (*NI)->EvaluateKnownConstInt(C) 11724 : llvm::APSInt::getUnsigned( 11725 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11726 .getQuantity()); 11727 ++NI; 11728 } 11729 // Mark linear parameters. 11730 auto SI = Attr->steps_begin(); 11731 auto MI = Attr->modifiers_begin(); 11732 for (const Expr *E : Attr->linears()) { 11733 E = E->IgnoreParenImpCasts(); 11734 unsigned Pos; 11735 // Rescaling factor needed to compute the linear parameter 11736 // value in the mangled name. 11737 unsigned PtrRescalingFactor = 1; 11738 if (isa<CXXThisExpr>(E)) { 11739 Pos = ParamPositions[FD]; 11740 } else { 11741 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11742 ->getCanonicalDecl(); 11743 Pos = ParamPositions[PVD]; 11744 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11745 PtrRescalingFactor = CGM.getContext() 11746 .getTypeSizeInChars(P->getPointeeType()) 11747 .getQuantity(); 11748 } 11749 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11750 ParamAttr.Kind = Linear; 11751 // Assuming a stride of 1, for `linear` without modifiers. 11752 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11753 if (*SI) { 11754 Expr::EvalResult Result; 11755 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11756 if (const auto *DRE = 11757 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11758 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11759 ParamAttr.Kind = LinearWithVarStride; 11760 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11761 ParamPositions[StridePVD->getCanonicalDecl()]); 11762 } 11763 } 11764 } else { 11765 ParamAttr.StrideOrArg = Result.Val.getInt(); 11766 } 11767 } 11768 // If we are using a linear clause on a pointer, we need to 11769 // rescale the value of linear_step with the byte size of the 11770 // pointee type. 11771 if (Linear == ParamAttr.Kind) 11772 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11773 ++SI; 11774 ++MI; 11775 } 11776 llvm::APSInt VLENVal; 11777 SourceLocation ExprLoc; 11778 const Expr *VLENExpr = Attr->getSimdlen(); 11779 if (VLENExpr) { 11780 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11781 ExprLoc = VLENExpr->getExprLoc(); 11782 } 11783 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11784 if (CGM.getTriple().isX86()) { 11785 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11786 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11787 unsigned VLEN = VLENVal.getExtValue(); 11788 StringRef MangledName = Fn->getName(); 11789 if (CGM.getTarget().hasFeature("sve")) 11790 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11791 MangledName, 's', 128, Fn, ExprLoc); 11792 if (CGM.getTarget().hasFeature("neon")) 11793 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11794 MangledName, 'n', 128, Fn, ExprLoc); 11795 } 11796 } 11797 FD = FD->getPreviousDecl(); 11798 } 11799 } 11800 11801 namespace { 11802 /// Cleanup action for doacross support. 11803 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11804 public: 11805 static const int DoacrossFinArgs = 2; 11806 11807 private: 11808 llvm::FunctionCallee RTLFn; 11809 llvm::Value *Args[DoacrossFinArgs]; 11810 11811 public: 11812 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11813 ArrayRef<llvm::Value *> CallArgs) 11814 : RTLFn(RTLFn) { 11815 assert(CallArgs.size() == DoacrossFinArgs); 11816 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11817 } 11818 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11819 if (!CGF.HaveInsertPoint()) 11820 return; 11821 CGF.EmitRuntimeCall(RTLFn, Args); 11822 } 11823 }; 11824 } // namespace 11825 11826 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11827 const OMPLoopDirective &D, 11828 ArrayRef<Expr *> NumIterations) { 11829 if (!CGF.HaveInsertPoint()) 11830 return; 11831 11832 ASTContext &C = CGM.getContext(); 11833 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11834 RecordDecl *RD; 11835 if (KmpDimTy.isNull()) { 11836 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11837 // kmp_int64 lo; // lower 11838 // kmp_int64 up; // upper 11839 // kmp_int64 st; // stride 11840 // }; 11841 RD = C.buildImplicitRecord("kmp_dim"); 11842 RD->startDefinition(); 11843 addFieldToRecordDecl(C, RD, Int64Ty); 11844 addFieldToRecordDecl(C, RD, Int64Ty); 11845 addFieldToRecordDecl(C, RD, Int64Ty); 11846 RD->completeDefinition(); 11847 KmpDimTy = C.getRecordType(RD); 11848 } else { 11849 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11850 } 11851 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11852 QualType ArrayTy = 11853 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11854 11855 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11856 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11857 enum { LowerFD = 0, UpperFD, StrideFD }; 11858 // Fill dims with data. 11859 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11860 LValue DimsLVal = CGF.MakeAddrLValue( 11861 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11862 // dims.upper = num_iterations; 11863 LValue UpperLVal = CGF.EmitLValueForField( 11864 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11865 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11866 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11867 Int64Ty, NumIterations[I]->getExprLoc()); 11868 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11869 // dims.stride = 1; 11870 LValue StrideLVal = CGF.EmitLValueForField( 11871 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11872 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11873 StrideLVal); 11874 } 11875 11876 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11877 // kmp_int32 num_dims, struct kmp_dim * dims); 11878 llvm::Value *Args[] = { 11879 emitUpdateLocation(CGF, D.getBeginLoc()), 11880 getThreadID(CGF, D.getBeginLoc()), 11881 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11882 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11883 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11884 CGM.VoidPtrTy)}; 11885 11886 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11887 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11888 CGF.EmitRuntimeCall(RTLFn, Args); 11889 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11890 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11891 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11892 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11893 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11894 llvm::makeArrayRef(FiniArgs)); 11895 } 11896 11897 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11898 const OMPDependClause *C) { 11899 QualType Int64Ty = 11900 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11901 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11902 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11903 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11904 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11905 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11906 const Expr *CounterVal = C->getLoopData(I); 11907 assert(CounterVal); 11908 llvm::Value *CntVal = CGF.EmitScalarConversion( 11909 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11910 CounterVal->getExprLoc()); 11911 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11912 /*Volatile=*/false, Int64Ty); 11913 } 11914 llvm::Value *Args[] = { 11915 emitUpdateLocation(CGF, C->getBeginLoc()), 11916 getThreadID(CGF, C->getBeginLoc()), 11917 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11918 llvm::FunctionCallee RTLFn; 11919 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11920 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11921 OMPRTL___kmpc_doacross_post); 11922 } else { 11923 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11924 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11925 OMPRTL___kmpc_doacross_wait); 11926 } 11927 CGF.EmitRuntimeCall(RTLFn, Args); 11928 } 11929 11930 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11931 llvm::FunctionCallee Callee, 11932 ArrayRef<llvm::Value *> Args) const { 11933 assert(Loc.isValid() && "Outlined function call location must be valid."); 11934 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11935 11936 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11937 if (Fn->doesNotThrow()) { 11938 CGF.EmitNounwindRuntimeCall(Fn, Args); 11939 return; 11940 } 11941 } 11942 CGF.EmitRuntimeCall(Callee, Args); 11943 } 11944 11945 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11946 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11947 ArrayRef<llvm::Value *> Args) const { 11948 emitCall(CGF, Loc, OutlinedFn, Args); 11949 } 11950 11951 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11952 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11953 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11954 HasEmittedDeclareTargetRegion = true; 11955 } 11956 11957 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11958 const VarDecl *NativeParam, 11959 const VarDecl *TargetParam) const { 11960 return CGF.GetAddrOfLocalVar(NativeParam); 11961 } 11962 11963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11964 const VarDecl *VD) { 11965 if (!VD) 11966 return Address::invalid(); 11967 Address UntiedAddr = Address::invalid(); 11968 Address UntiedRealAddr = Address::invalid(); 11969 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11970 if (It != FunctionToUntiedTaskStackMap.end()) { 11971 const UntiedLocalVarsAddressesMap &UntiedData = 11972 UntiedLocalVarsStack[It->second]; 11973 auto I = UntiedData.find(VD); 11974 if (I != UntiedData.end()) { 11975 UntiedAddr = I->second.first; 11976 UntiedRealAddr = I->second.second; 11977 } 11978 } 11979 const VarDecl *CVD = VD->getCanonicalDecl(); 11980 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11981 // Use the default allocation. 11982 if (!isAllocatableDecl(VD)) 11983 return UntiedAddr; 11984 llvm::Value *Size; 11985 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11986 if (CVD->getType()->isVariablyModifiedType()) { 11987 Size = CGF.getTypeSize(CVD->getType()); 11988 // Align the size: ((size + align - 1) / align) * align 11989 Size = CGF.Builder.CreateNUWAdd( 11990 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11991 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11992 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11993 } else { 11994 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11995 Size = CGM.getSize(Sz.alignTo(Align)); 11996 } 11997 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11998 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11999 assert(AA->getAllocator() && 12000 "Expected allocator expression for non-default allocator."); 12001 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12002 // According to the standard, the original allocator type is a enum 12003 // (integer). Convert to pointer type, if required. 12004 Allocator = CGF.EmitScalarConversion( 12005 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12006 AA->getAllocator()->getExprLoc()); 12007 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12008 12009 llvm::Value *Addr = 12010 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12011 CGM.getModule(), OMPRTL___kmpc_alloc), 12012 Args, getName({CVD->getName(), ".void.addr"})); 12013 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12014 CGM.getModule(), OMPRTL___kmpc_free); 12015 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12016 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12017 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12018 if (UntiedAddr.isValid()) 12019 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12020 12021 // Cleanup action for allocate support. 12022 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12023 llvm::FunctionCallee RTLFn; 12024 unsigned LocEncoding; 12025 Address Addr; 12026 const Expr *Allocator; 12027 12028 public: 12029 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 12030 Address Addr, const Expr *Allocator) 12031 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12032 Allocator(Allocator) {} 12033 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12034 if (!CGF.HaveInsertPoint()) 12035 return; 12036 llvm::Value *Args[3]; 12037 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12038 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12039 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12040 Addr.getPointer(), CGF.VoidPtrTy); 12041 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12042 // According to the standard, the original allocator type is a enum 12043 // (integer). Convert to pointer type, if required. 12044 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12045 CGF.getContext().VoidPtrTy, 12046 Allocator->getExprLoc()); 12047 Args[2] = AllocVal; 12048 12049 CGF.EmitRuntimeCall(RTLFn, Args); 12050 } 12051 }; 12052 Address VDAddr = 12053 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12054 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12055 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12056 VDAddr, AA->getAllocator()); 12057 if (UntiedRealAddr.isValid()) 12058 if (auto *Region = 12059 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12060 Region->emitUntiedSwitch(CGF); 12061 return VDAddr; 12062 } 12063 return UntiedAddr; 12064 } 12065 12066 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12067 const VarDecl *VD) const { 12068 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12069 if (It == FunctionToUntiedTaskStackMap.end()) 12070 return false; 12071 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12072 } 12073 12074 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12075 CodeGenModule &CGM, const OMPLoopDirective &S) 12076 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12077 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12078 if (!NeedToPush) 12079 return; 12080 NontemporalDeclsSet &DS = 12081 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12082 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12083 for (const Stmt *Ref : C->private_refs()) { 12084 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12085 const ValueDecl *VD; 12086 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12087 VD = DRE->getDecl(); 12088 } else { 12089 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12090 assert((ME->isImplicitCXXThis() || 12091 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12092 "Expected member of current class."); 12093 VD = ME->getMemberDecl(); 12094 } 12095 DS.insert(VD); 12096 } 12097 } 12098 } 12099 12100 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12101 if (!NeedToPush) 12102 return; 12103 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12104 } 12105 12106 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12107 CodeGenFunction &CGF, 12108 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 12109 std::pair<Address, Address>> &LocalVars) 12110 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12111 if (!NeedToPush) 12112 return; 12113 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12114 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12115 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12116 } 12117 12118 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12119 if (!NeedToPush) 12120 return; 12121 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12122 } 12123 12124 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12125 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12126 12127 return llvm::any_of( 12128 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12129 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12130 } 12131 12132 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12133 const OMPExecutableDirective &S, 12134 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12135 const { 12136 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12137 // Vars in target/task regions must be excluded completely. 12138 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12139 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12140 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12141 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12142 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12143 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12144 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12145 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12146 } 12147 } 12148 // Exclude vars in private clauses. 12149 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12150 for (const Expr *Ref : C->varlists()) { 12151 if (!Ref->getType()->isScalarType()) 12152 continue; 12153 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12154 if (!DRE) 12155 continue; 12156 NeedToCheckForLPCs.insert(DRE->getDecl()); 12157 } 12158 } 12159 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12160 for (const Expr *Ref : C->varlists()) { 12161 if (!Ref->getType()->isScalarType()) 12162 continue; 12163 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12164 if (!DRE) 12165 continue; 12166 NeedToCheckForLPCs.insert(DRE->getDecl()); 12167 } 12168 } 12169 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12170 for (const Expr *Ref : C->varlists()) { 12171 if (!Ref->getType()->isScalarType()) 12172 continue; 12173 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12174 if (!DRE) 12175 continue; 12176 NeedToCheckForLPCs.insert(DRE->getDecl()); 12177 } 12178 } 12179 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12180 for (const Expr *Ref : C->varlists()) { 12181 if (!Ref->getType()->isScalarType()) 12182 continue; 12183 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12184 if (!DRE) 12185 continue; 12186 NeedToCheckForLPCs.insert(DRE->getDecl()); 12187 } 12188 } 12189 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12190 for (const Expr *Ref : C->varlists()) { 12191 if (!Ref->getType()->isScalarType()) 12192 continue; 12193 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12194 if (!DRE) 12195 continue; 12196 NeedToCheckForLPCs.insert(DRE->getDecl()); 12197 } 12198 } 12199 for (const Decl *VD : NeedToCheckForLPCs) { 12200 for (const LastprivateConditionalData &Data : 12201 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12202 if (Data.DeclToUniqueName.count(VD) > 0) { 12203 if (!Data.Disabled) 12204 NeedToAddForLPCsAsDisabled.insert(VD); 12205 break; 12206 } 12207 } 12208 } 12209 } 12210 12211 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12212 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12213 : CGM(CGF.CGM), 12214 Action((CGM.getLangOpts().OpenMP >= 50 && 12215 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12216 [](const OMPLastprivateClause *C) { 12217 return C->getKind() == 12218 OMPC_LASTPRIVATE_conditional; 12219 })) 12220 ? ActionToDo::PushAsLastprivateConditional 12221 : ActionToDo::DoNotPush) { 12222 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12223 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12224 return; 12225 assert(Action == ActionToDo::PushAsLastprivateConditional && 12226 "Expected a push action."); 12227 LastprivateConditionalData &Data = 12228 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12229 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12230 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12231 continue; 12232 12233 for (const Expr *Ref : C->varlists()) { 12234 Data.DeclToUniqueName.insert(std::make_pair( 12235 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12236 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12237 } 12238 } 12239 Data.IVLVal = IVLVal; 12240 Data.Fn = CGF.CurFn; 12241 } 12242 12243 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12244 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12245 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12246 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12247 if (CGM.getLangOpts().OpenMP < 50) 12248 return; 12249 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12250 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12251 if (!NeedToAddForLPCsAsDisabled.empty()) { 12252 Action = ActionToDo::DisableLastprivateConditional; 12253 LastprivateConditionalData &Data = 12254 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12255 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12256 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12257 Data.Fn = CGF.CurFn; 12258 Data.Disabled = true; 12259 } 12260 } 12261 12262 CGOpenMPRuntime::LastprivateConditionalRAII 12263 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12264 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12265 return LastprivateConditionalRAII(CGF, S); 12266 } 12267 12268 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12269 if (CGM.getLangOpts().OpenMP < 50) 12270 return; 12271 if (Action == ActionToDo::DisableLastprivateConditional) { 12272 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12273 "Expected list of disabled private vars."); 12274 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12275 } 12276 if (Action == ActionToDo::PushAsLastprivateConditional) { 12277 assert( 12278 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12279 "Expected list of lastprivate conditional vars."); 12280 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12281 } 12282 } 12283 12284 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12285 const VarDecl *VD) { 12286 ASTContext &C = CGM.getContext(); 12287 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12288 if (I == LastprivateConditionalToTypes.end()) 12289 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12290 QualType NewType; 12291 const FieldDecl *VDField; 12292 const FieldDecl *FiredField; 12293 LValue BaseLVal; 12294 auto VI = I->getSecond().find(VD); 12295 if (VI == I->getSecond().end()) { 12296 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12297 RD->startDefinition(); 12298 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12299 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12300 RD->completeDefinition(); 12301 NewType = C.getRecordType(RD); 12302 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12303 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12304 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12305 } else { 12306 NewType = std::get<0>(VI->getSecond()); 12307 VDField = std::get<1>(VI->getSecond()); 12308 FiredField = std::get<2>(VI->getSecond()); 12309 BaseLVal = std::get<3>(VI->getSecond()); 12310 } 12311 LValue FiredLVal = 12312 CGF.EmitLValueForField(BaseLVal, FiredField); 12313 CGF.EmitStoreOfScalar( 12314 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12315 FiredLVal); 12316 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12317 } 12318 12319 namespace { 12320 /// Checks if the lastprivate conditional variable is referenced in LHS. 12321 class LastprivateConditionalRefChecker final 12322 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12323 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12324 const Expr *FoundE = nullptr; 12325 const Decl *FoundD = nullptr; 12326 StringRef UniqueDeclName; 12327 LValue IVLVal; 12328 llvm::Function *FoundFn = nullptr; 12329 SourceLocation Loc; 12330 12331 public: 12332 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12333 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12334 llvm::reverse(LPM)) { 12335 auto It = D.DeclToUniqueName.find(E->getDecl()); 12336 if (It == D.DeclToUniqueName.end()) 12337 continue; 12338 if (D.Disabled) 12339 return false; 12340 FoundE = E; 12341 FoundD = E->getDecl()->getCanonicalDecl(); 12342 UniqueDeclName = It->second; 12343 IVLVal = D.IVLVal; 12344 FoundFn = D.Fn; 12345 break; 12346 } 12347 return FoundE == E; 12348 } 12349 bool VisitMemberExpr(const MemberExpr *E) { 12350 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12351 return false; 12352 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12353 llvm::reverse(LPM)) { 12354 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12355 if (It == D.DeclToUniqueName.end()) 12356 continue; 12357 if (D.Disabled) 12358 return false; 12359 FoundE = E; 12360 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12361 UniqueDeclName = It->second; 12362 IVLVal = D.IVLVal; 12363 FoundFn = D.Fn; 12364 break; 12365 } 12366 return FoundE == E; 12367 } 12368 bool VisitStmt(const Stmt *S) { 12369 for (const Stmt *Child : S->children()) { 12370 if (!Child) 12371 continue; 12372 if (const auto *E = dyn_cast<Expr>(Child)) 12373 if (!E->isGLValue()) 12374 continue; 12375 if (Visit(Child)) 12376 return true; 12377 } 12378 return false; 12379 } 12380 explicit LastprivateConditionalRefChecker( 12381 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12382 : LPM(LPM) {} 12383 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12384 getFoundData() const { 12385 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12386 } 12387 }; 12388 } // namespace 12389 12390 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12391 LValue IVLVal, 12392 StringRef UniqueDeclName, 12393 LValue LVal, 12394 SourceLocation Loc) { 12395 // Last updated loop counter for the lastprivate conditional var. 12396 // int<xx> last_iv = 0; 12397 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12398 llvm::Constant *LastIV = 12399 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12400 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12401 IVLVal.getAlignment().getAsAlign()); 12402 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12403 12404 // Last value of the lastprivate conditional. 12405 // decltype(priv_a) last_a; 12406 llvm::Constant *Last = getOrCreateInternalVariable( 12407 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12408 cast<llvm::GlobalVariable>(Last)->setAlignment( 12409 LVal.getAlignment().getAsAlign()); 12410 LValue LastLVal = 12411 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12412 12413 // Global loop counter. Required to handle inner parallel-for regions. 12414 // iv 12415 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12416 12417 // #pragma omp critical(a) 12418 // if (last_iv <= iv) { 12419 // last_iv = iv; 12420 // last_a = priv_a; 12421 // } 12422 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12423 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12424 Action.Enter(CGF); 12425 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12426 // (last_iv <= iv) ? Check if the variable is updated and store new 12427 // value in global var. 12428 llvm::Value *CmpRes; 12429 if (IVLVal.getType()->isSignedIntegerType()) { 12430 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12431 } else { 12432 assert(IVLVal.getType()->isUnsignedIntegerType() && 12433 "Loop iteration variable must be integer."); 12434 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12435 } 12436 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12437 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12438 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12439 // { 12440 CGF.EmitBlock(ThenBB); 12441 12442 // last_iv = iv; 12443 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12444 12445 // last_a = priv_a; 12446 switch (CGF.getEvaluationKind(LVal.getType())) { 12447 case TEK_Scalar: { 12448 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12449 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12450 break; 12451 } 12452 case TEK_Complex: { 12453 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12454 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12455 break; 12456 } 12457 case TEK_Aggregate: 12458 llvm_unreachable( 12459 "Aggregates are not supported in lastprivate conditional."); 12460 } 12461 // } 12462 CGF.EmitBranch(ExitBB); 12463 // There is no need to emit line number for unconditional branch. 12464 (void)ApplyDebugLocation::CreateEmpty(CGF); 12465 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12466 }; 12467 12468 if (CGM.getLangOpts().OpenMPSimd) { 12469 // Do not emit as a critical region as no parallel region could be emitted. 12470 RegionCodeGenTy ThenRCG(CodeGen); 12471 ThenRCG(CGF); 12472 } else { 12473 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12474 } 12475 } 12476 12477 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12478 const Expr *LHS) { 12479 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12480 return; 12481 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12482 if (!Checker.Visit(LHS)) 12483 return; 12484 const Expr *FoundE; 12485 const Decl *FoundD; 12486 StringRef UniqueDeclName; 12487 LValue IVLVal; 12488 llvm::Function *FoundFn; 12489 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12490 Checker.getFoundData(); 12491 if (FoundFn != CGF.CurFn) { 12492 // Special codegen for inner parallel regions. 12493 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12494 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12495 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12496 "Lastprivate conditional is not found in outer region."); 12497 QualType StructTy = std::get<0>(It->getSecond()); 12498 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12499 LValue PrivLVal = CGF.EmitLValue(FoundE); 12500 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12501 PrivLVal.getAddress(CGF), 12502 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12503 LValue BaseLVal = 12504 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12505 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12506 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12507 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12508 FiredLVal, llvm::AtomicOrdering::Unordered, 12509 /*IsVolatile=*/true, /*isInit=*/false); 12510 return; 12511 } 12512 12513 // Private address of the lastprivate conditional in the current context. 12514 // priv_a 12515 LValue LVal = CGF.EmitLValue(FoundE); 12516 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12517 FoundE->getExprLoc()); 12518 } 12519 12520 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12521 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12522 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12523 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12524 return; 12525 auto Range = llvm::reverse(LastprivateConditionalStack); 12526 auto It = llvm::find_if( 12527 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12528 if (It == Range.end() || It->Fn != CGF.CurFn) 12529 return; 12530 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12531 assert(LPCI != LastprivateConditionalToTypes.end() && 12532 "Lastprivates must be registered already."); 12533 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12534 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12535 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12536 for (const auto &Pair : It->DeclToUniqueName) { 12537 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12538 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12539 continue; 12540 auto I = LPCI->getSecond().find(Pair.first); 12541 assert(I != LPCI->getSecond().end() && 12542 "Lastprivate must be rehistered already."); 12543 // bool Cmp = priv_a.Fired != 0; 12544 LValue BaseLVal = std::get<3>(I->getSecond()); 12545 LValue FiredLVal = 12546 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12547 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12548 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12549 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12550 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12551 // if (Cmp) { 12552 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12553 CGF.EmitBlock(ThenBB); 12554 Address Addr = CGF.GetAddrOfLocalVar(VD); 12555 LValue LVal; 12556 if (VD->getType()->isReferenceType()) 12557 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12558 AlignmentSource::Decl); 12559 else 12560 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12561 AlignmentSource::Decl); 12562 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12563 D.getBeginLoc()); 12564 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12565 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12566 // } 12567 } 12568 } 12569 12570 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12571 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12572 SourceLocation Loc) { 12573 if (CGF.getLangOpts().OpenMP < 50) 12574 return; 12575 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12576 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12577 "Unknown lastprivate conditional variable."); 12578 StringRef UniqueName = It->second; 12579 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12580 // The variable was not updated in the region - exit. 12581 if (!GV) 12582 return; 12583 LValue LPLVal = CGF.MakeAddrLValue( 12584 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12585 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12586 CGF.EmitStoreOfScalar(Res, PrivLVal); 12587 } 12588 12589 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12590 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12591 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12592 llvm_unreachable("Not supported in SIMD-only mode"); 12593 } 12594 12595 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12596 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12597 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12598 llvm_unreachable("Not supported in SIMD-only mode"); 12599 } 12600 12601 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12602 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12603 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12604 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12605 bool Tied, unsigned &NumberOfParts) { 12606 llvm_unreachable("Not supported in SIMD-only mode"); 12607 } 12608 12609 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12610 SourceLocation Loc, 12611 llvm::Function *OutlinedFn, 12612 ArrayRef<llvm::Value *> CapturedVars, 12613 const Expr *IfCond) { 12614 llvm_unreachable("Not supported in SIMD-only mode"); 12615 } 12616 12617 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12618 CodeGenFunction &CGF, StringRef CriticalName, 12619 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12620 const Expr *Hint) { 12621 llvm_unreachable("Not supported in SIMD-only mode"); 12622 } 12623 12624 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12625 const RegionCodeGenTy &MasterOpGen, 12626 SourceLocation Loc) { 12627 llvm_unreachable("Not supported in SIMD-only mode"); 12628 } 12629 12630 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12631 const RegionCodeGenTy &MasterOpGen, 12632 SourceLocation Loc, 12633 const Expr *Filter) { 12634 llvm_unreachable("Not supported in SIMD-only mode"); 12635 } 12636 12637 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12638 SourceLocation Loc) { 12639 llvm_unreachable("Not supported in SIMD-only mode"); 12640 } 12641 12642 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12643 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12644 SourceLocation Loc) { 12645 llvm_unreachable("Not supported in SIMD-only mode"); 12646 } 12647 12648 void CGOpenMPSIMDRuntime::emitSingleRegion( 12649 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12650 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12651 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12652 ArrayRef<const Expr *> AssignmentOps) { 12653 llvm_unreachable("Not supported in SIMD-only mode"); 12654 } 12655 12656 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12657 const RegionCodeGenTy &OrderedOpGen, 12658 SourceLocation Loc, 12659 bool IsThreads) { 12660 llvm_unreachable("Not supported in SIMD-only mode"); 12661 } 12662 12663 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12664 SourceLocation Loc, 12665 OpenMPDirectiveKind Kind, 12666 bool EmitChecks, 12667 bool ForceSimpleCall) { 12668 llvm_unreachable("Not supported in SIMD-only mode"); 12669 } 12670 12671 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12672 CodeGenFunction &CGF, SourceLocation Loc, 12673 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12674 bool Ordered, const DispatchRTInput &DispatchValues) { 12675 llvm_unreachable("Not supported in SIMD-only mode"); 12676 } 12677 12678 void CGOpenMPSIMDRuntime::emitForStaticInit( 12679 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12680 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12681 llvm_unreachable("Not supported in SIMD-only mode"); 12682 } 12683 12684 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12685 CodeGenFunction &CGF, SourceLocation Loc, 12686 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12687 llvm_unreachable("Not supported in SIMD-only mode"); 12688 } 12689 12690 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12691 SourceLocation Loc, 12692 unsigned IVSize, 12693 bool IVSigned) { 12694 llvm_unreachable("Not supported in SIMD-only mode"); 12695 } 12696 12697 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12698 SourceLocation Loc, 12699 OpenMPDirectiveKind DKind) { 12700 llvm_unreachable("Not supported in SIMD-only mode"); 12701 } 12702 12703 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12704 SourceLocation Loc, 12705 unsigned IVSize, bool IVSigned, 12706 Address IL, Address LB, 12707 Address UB, Address ST) { 12708 llvm_unreachable("Not supported in SIMD-only mode"); 12709 } 12710 12711 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12712 llvm::Value *NumThreads, 12713 SourceLocation Loc) { 12714 llvm_unreachable("Not supported in SIMD-only mode"); 12715 } 12716 12717 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12718 ProcBindKind ProcBind, 12719 SourceLocation Loc) { 12720 llvm_unreachable("Not supported in SIMD-only mode"); 12721 } 12722 12723 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12724 const VarDecl *VD, 12725 Address VDAddr, 12726 SourceLocation Loc) { 12727 llvm_unreachable("Not supported in SIMD-only mode"); 12728 } 12729 12730 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12731 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12732 CodeGenFunction *CGF) { 12733 llvm_unreachable("Not supported in SIMD-only mode"); 12734 } 12735 12736 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12737 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12738 llvm_unreachable("Not supported in SIMD-only mode"); 12739 } 12740 12741 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12742 ArrayRef<const Expr *> Vars, 12743 SourceLocation Loc, 12744 llvm::AtomicOrdering AO) { 12745 llvm_unreachable("Not supported in SIMD-only mode"); 12746 } 12747 12748 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12749 const OMPExecutableDirective &D, 12750 llvm::Function *TaskFunction, 12751 QualType SharedsTy, Address Shareds, 12752 const Expr *IfCond, 12753 const OMPTaskDataTy &Data) { 12754 llvm_unreachable("Not supported in SIMD-only mode"); 12755 } 12756 12757 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12758 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12759 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12760 const Expr *IfCond, const OMPTaskDataTy &Data) { 12761 llvm_unreachable("Not supported in SIMD-only mode"); 12762 } 12763 12764 void CGOpenMPSIMDRuntime::emitReduction( 12765 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12766 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12767 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12768 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12769 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12770 ReductionOps, Options); 12771 } 12772 12773 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12774 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12775 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12776 llvm_unreachable("Not supported in SIMD-only mode"); 12777 } 12778 12779 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12780 SourceLocation Loc, 12781 bool IsWorksharingReduction) { 12782 llvm_unreachable("Not supported in SIMD-only mode"); 12783 } 12784 12785 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12786 SourceLocation Loc, 12787 ReductionCodeGen &RCG, 12788 unsigned N) { 12789 llvm_unreachable("Not supported in SIMD-only mode"); 12790 } 12791 12792 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12793 SourceLocation Loc, 12794 llvm::Value *ReductionsPtr, 12795 LValue SharedLVal) { 12796 llvm_unreachable("Not supported in SIMD-only mode"); 12797 } 12798 12799 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12800 SourceLocation Loc) { 12801 llvm_unreachable("Not supported in SIMD-only mode"); 12802 } 12803 12804 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12805 CodeGenFunction &CGF, SourceLocation Loc, 12806 OpenMPDirectiveKind CancelRegion) { 12807 llvm_unreachable("Not supported in SIMD-only mode"); 12808 } 12809 12810 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12811 SourceLocation Loc, const Expr *IfCond, 12812 OpenMPDirectiveKind CancelRegion) { 12813 llvm_unreachable("Not supported in SIMD-only mode"); 12814 } 12815 12816 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12817 const OMPExecutableDirective &D, StringRef ParentName, 12818 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12819 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12820 llvm_unreachable("Not supported in SIMD-only mode"); 12821 } 12822 12823 void CGOpenMPSIMDRuntime::emitTargetCall( 12824 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12825 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12826 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12827 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12828 const OMPLoopDirective &D)> 12829 SizeEmitter) { 12830 llvm_unreachable("Not supported in SIMD-only mode"); 12831 } 12832 12833 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12834 llvm_unreachable("Not supported in SIMD-only mode"); 12835 } 12836 12837 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12838 llvm_unreachable("Not supported in SIMD-only mode"); 12839 } 12840 12841 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12842 return false; 12843 } 12844 12845 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12846 const OMPExecutableDirective &D, 12847 SourceLocation Loc, 12848 llvm::Function *OutlinedFn, 12849 ArrayRef<llvm::Value *> CapturedVars) { 12850 llvm_unreachable("Not supported in SIMD-only mode"); 12851 } 12852 12853 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12854 const Expr *NumTeams, 12855 const Expr *ThreadLimit, 12856 SourceLocation Loc) { 12857 llvm_unreachable("Not supported in SIMD-only mode"); 12858 } 12859 12860 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12861 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12862 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12863 llvm_unreachable("Not supported in SIMD-only mode"); 12864 } 12865 12866 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12867 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12868 const Expr *Device) { 12869 llvm_unreachable("Not supported in SIMD-only mode"); 12870 } 12871 12872 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12873 const OMPLoopDirective &D, 12874 ArrayRef<Expr *> NumIterations) { 12875 llvm_unreachable("Not supported in SIMD-only mode"); 12876 } 12877 12878 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12879 const OMPDependClause *C) { 12880 llvm_unreachable("Not supported in SIMD-only mode"); 12881 } 12882 12883 const VarDecl * 12884 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12885 const VarDecl *NativeParam) const { 12886 llvm_unreachable("Not supported in SIMD-only mode"); 12887 } 12888 12889 Address 12890 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12891 const VarDecl *NativeParam, 12892 const VarDecl *TargetParam) const { 12893 llvm_unreachable("Not supported in SIMD-only mode"); 12894 } 12895