1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 bool NoInheritance = false; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel, 421 bool NoInheritance = true) 422 : CGF(CGF), NoInheritance(NoInheritance) { 423 // Start emission for the construct. 424 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 425 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 426 if (NoInheritance) { 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 429 CGF.LambdaThisCaptureField = nullptr; 430 BlockInfo = CGF.BlockInfo; 431 CGF.BlockInfo = nullptr; 432 } 433 } 434 435 ~InlinedOpenMPRegionRAII() { 436 // Restore original CapturedStmtInfo only if we're done with code emission. 437 auto *OldCSI = 438 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 439 delete CGF.CapturedStmtInfo; 440 CGF.CapturedStmtInfo = OldCSI; 441 if (NoInheritance) { 442 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 443 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 444 CGF.BlockInfo = BlockInfo; 445 } 446 } 447 }; 448 449 /// Values for bit flags used in the ident_t to describe the fields. 450 /// All enumeric elements are named and described in accordance with the code 451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 452 enum OpenMPLocationFlags : unsigned { 453 /// Use trampoline for internal microtask. 454 OMP_IDENT_IMD = 0x01, 455 /// Use c-style ident structure. 456 OMP_IDENT_KMPC = 0x02, 457 /// Atomic reduction option for kmpc_reduce. 458 OMP_ATOMIC_REDUCE = 0x10, 459 /// Explicit 'barrier' directive. 460 OMP_IDENT_BARRIER_EXPL = 0x20, 461 /// Implicit barrier in code. 462 OMP_IDENT_BARRIER_IMPL = 0x40, 463 /// Implicit barrier in 'for' directive. 464 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 465 /// Implicit barrier in 'sections' directive. 466 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 467 /// Implicit barrier in 'single' directive. 468 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 469 /// Call of __kmp_for_static_init for static loop. 470 OMP_IDENT_WORK_LOOP = 0x200, 471 /// Call of __kmp_for_static_init for sections. 472 OMP_IDENT_WORK_SECTIONS = 0x400, 473 /// Call of __kmp_for_static_init for distribute. 474 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 475 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 476 }; 477 478 namespace { 479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 480 /// Values for bit flags for marking which requires clauses have been used. 481 enum OpenMPOffloadingRequiresDirFlags : int64_t { 482 /// flag undefined. 483 OMP_REQ_UNDEFINED = 0x000, 484 /// no requires clause present. 485 OMP_REQ_NONE = 0x001, 486 /// reverse_offload clause. 487 OMP_REQ_REVERSE_OFFLOAD = 0x002, 488 /// unified_address clause. 489 OMP_REQ_UNIFIED_ADDRESS = 0x004, 490 /// unified_shared_memory clause. 491 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 492 /// dynamic_allocators clause. 493 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 494 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 495 }; 496 497 enum OpenMPOffloadingReservedDeviceIDs { 498 /// Device ID if the device was not defined, runtime should get it 499 /// from environment variables in the spec. 500 OMP_DEVICEID_UNDEF = -1, 501 }; 502 } // anonymous namespace 503 504 /// Describes ident structure that describes a source location. 505 /// All descriptions are taken from 506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 507 /// Original structure: 508 /// typedef struct ident { 509 /// kmp_int32 reserved_1; /**< might be used in Fortran; 510 /// see above */ 511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 512 /// KMP_IDENT_KMPC identifies this union 513 /// member */ 514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 515 /// see above */ 516 ///#if USE_ITT_BUILD 517 /// /* but currently used for storing 518 /// region-specific ITT */ 519 /// /* contextual information. */ 520 ///#endif /* USE_ITT_BUILD */ 521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 522 /// C++ */ 523 /// char const *psource; /**< String describing the source location. 524 /// The string is composed of semi-colon separated 525 // fields which describe the source file, 526 /// the function and a pair of line numbers that 527 /// delimit the construct. 528 /// */ 529 /// } ident_t; 530 enum IdentFieldIndex { 531 /// might be used in Fortran 532 IdentField_Reserved_1, 533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 534 IdentField_Flags, 535 /// Not really used in Fortran any more 536 IdentField_Reserved_2, 537 /// Source[4] in Fortran, do not use for C++ 538 IdentField_Reserved_3, 539 /// String describing the source location. The string is composed of 540 /// semi-colon separated fields which describe the source file, the function 541 /// and a pair of line numbers that delimit the construct. 542 IdentField_PSource 543 }; 544 545 /// Schedule types for 'omp for' loops (these enumerators are taken from 546 /// the enum sched_type in kmp.h). 547 enum OpenMPSchedType { 548 /// Lower bound for default (unordered) versions. 549 OMP_sch_lower = 32, 550 OMP_sch_static_chunked = 33, 551 OMP_sch_static = 34, 552 OMP_sch_dynamic_chunked = 35, 553 OMP_sch_guided_chunked = 36, 554 OMP_sch_runtime = 37, 555 OMP_sch_auto = 38, 556 /// static with chunk adjustment (e.g., simd) 557 OMP_sch_static_balanced_chunked = 45, 558 /// Lower bound for 'ordered' versions. 559 OMP_ord_lower = 64, 560 OMP_ord_static_chunked = 65, 561 OMP_ord_static = 66, 562 OMP_ord_dynamic_chunked = 67, 563 OMP_ord_guided_chunked = 68, 564 OMP_ord_runtime = 69, 565 OMP_ord_auto = 70, 566 OMP_sch_default = OMP_sch_static, 567 /// dist_schedule types 568 OMP_dist_sch_static_chunked = 91, 569 OMP_dist_sch_static = 92, 570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 571 /// Set if the monotonic schedule modifier was present. 572 OMP_sch_modifier_monotonic = (1 << 29), 573 /// Set if the nonmonotonic schedule modifier was present. 574 OMP_sch_modifier_nonmonotonic = (1 << 30), 575 }; 576 577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 578 /// region. 579 class CleanupTy final : public EHScopeStack::Cleanup { 580 PrePostActionTy *Action; 581 582 public: 583 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 584 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 585 if (!CGF.HaveInsertPoint()) 586 return; 587 Action->Exit(CGF); 588 } 589 }; 590 591 } // anonymous namespace 592 593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 594 CodeGenFunction::RunCleanupsScope Scope(CGF); 595 if (PrePostAction) { 596 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 597 Callback(CodeGen, CGF, *PrePostAction); 598 } else { 599 PrePostActionTy Action; 600 Callback(CodeGen, CGF, Action); 601 } 602 } 603 604 /// Check if the combiner is a call to UDR combiner and if it is so return the 605 /// UDR decl used for reduction. 606 static const OMPDeclareReductionDecl * 607 getReductionInit(const Expr *ReductionOp) { 608 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 609 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 610 if (const auto *DRE = 611 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 612 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 613 return DRD; 614 return nullptr; 615 } 616 617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 618 const OMPDeclareReductionDecl *DRD, 619 const Expr *InitOp, 620 Address Private, Address Original, 621 QualType Ty) { 622 if (DRD->getInitializer()) { 623 std::pair<llvm::Function *, llvm::Function *> Reduction = 624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 625 const auto *CE = cast<CallExpr>(InitOp); 626 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 627 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 628 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 629 const auto *LHSDRE = 630 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 631 const auto *RHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 633 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 634 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 635 [=]() { return Private; }); 636 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 637 [=]() { return Original; }); 638 (void)PrivateScope.Privatize(); 639 RValue Func = RValue::get(Reduction.second); 640 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 641 CGF.EmitIgnoredExpr(InitOp); 642 } else { 643 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 644 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 645 auto *GV = new llvm::GlobalVariable( 646 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 647 llvm::GlobalValue::PrivateLinkage, Init, Name); 648 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 649 RValue InitRVal; 650 switch (CGF.getEvaluationKind(Ty)) { 651 case TEK_Scalar: 652 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 653 break; 654 case TEK_Complex: 655 InitRVal = 656 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 657 break; 658 case TEK_Aggregate: 659 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 660 break; 661 } 662 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 663 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 664 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 665 /*IsInitializer=*/false); 666 } 667 } 668 669 /// Emit initialization of arrays of complex types. 670 /// \param DestAddr Address of the array. 671 /// \param Type Type of array. 672 /// \param Init Initial expression of array. 673 /// \param SrcAddr Address of the original array. 674 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 675 QualType Type, bool EmitDeclareReductionInit, 676 const Expr *Init, 677 const OMPDeclareReductionDecl *DRD, 678 Address SrcAddr = Address::invalid()) { 679 // Perform element-by-element initialization. 680 QualType ElementTy; 681 682 // Drill down to the base element type on both arrays. 683 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 684 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 685 DestAddr = 686 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 687 if (DRD) 688 SrcAddr = 689 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 690 691 llvm::Value *SrcBegin = nullptr; 692 if (DRD) 693 SrcBegin = SrcAddr.getPointer(); 694 llvm::Value *DestBegin = DestAddr.getPointer(); 695 // Cast from pointer to array type to pointer to single element. 696 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 697 // The basic structure here is a while-do loop. 698 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 699 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 700 llvm::Value *IsEmpty = 701 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 702 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 703 704 // Enter the loop body, making that address the current address. 705 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 706 CGF.EmitBlock(BodyBB); 707 708 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 709 710 llvm::PHINode *SrcElementPHI = nullptr; 711 Address SrcElementCurrent = Address::invalid(); 712 if (DRD) { 713 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 714 "omp.arraycpy.srcElementPast"); 715 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 716 SrcElementCurrent = 717 Address(SrcElementPHI, 718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 719 } 720 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 721 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 722 DestElementPHI->addIncoming(DestBegin, EntryBB); 723 Address DestElementCurrent = 724 Address(DestElementPHI, 725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 726 727 // Emit copy. 728 { 729 CodeGenFunction::RunCleanupsScope InitScope(CGF); 730 if (EmitDeclareReductionInit) { 731 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 732 SrcElementCurrent, ElementTy); 733 } else 734 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 735 /*IsInitializer=*/false); 736 } 737 738 if (DRD) { 739 // Shift the address forward by one element. 740 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 741 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 743 } 744 745 // Shift the address forward by one element. 746 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 747 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 748 // Check whether we've reached the end. 749 llvm::Value *Done = 750 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 751 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 752 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 753 754 // Done. 755 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 756 } 757 758 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 759 return CGF.EmitOMPSharedLValue(E); 760 } 761 762 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 763 const Expr *E) { 764 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 765 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 766 return LValue(); 767 } 768 769 void ReductionCodeGen::emitAggregateInitialization( 770 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 771 const OMPDeclareReductionDecl *DRD) { 772 // Emit VarDecl with copy init for arrays. 773 // Get the address of the original variable captured in current 774 // captured region. 775 const auto *PrivateVD = 776 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 777 bool EmitDeclareReductionInit = 778 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 779 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 780 EmitDeclareReductionInit, 781 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 782 : PrivateVD->getInit(), 783 DRD, SharedLVal.getAddress(CGF)); 784 } 785 786 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 787 ArrayRef<const Expr *> Origs, 788 ArrayRef<const Expr *> Privates, 789 ArrayRef<const Expr *> ReductionOps) { 790 ClausesData.reserve(Shareds.size()); 791 SharedAddresses.reserve(Shareds.size()); 792 Sizes.reserve(Shareds.size()); 793 BaseDecls.reserve(Shareds.size()); 794 const auto *IOrig = Origs.begin(); 795 const auto *IPriv = Privates.begin(); 796 const auto *IRed = ReductionOps.begin(); 797 for (const Expr *Ref : Shareds) { 798 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 799 std::advance(IOrig, 1); 800 std::advance(IPriv, 1); 801 std::advance(IRed, 1); 802 } 803 } 804 805 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 806 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 807 "Number of generated lvalues must be exactly N."); 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 810 SharedAddresses.emplace_back(First, Second); 811 if (ClausesData[N].Shared == ClausesData[N].Ref) { 812 OrigAddresses.emplace_back(First, Second); 813 } else { 814 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 816 OrigAddresses.emplace_back(First, Second); 817 } 818 } 819 820 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 821 const auto *PrivateVD = 822 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 823 QualType PrivateType = PrivateVD->getType(); 824 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 825 if (!PrivateType->isVariablyModifiedType()) { 826 Sizes.emplace_back( 827 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 828 nullptr); 829 return; 830 } 831 llvm::Value *Size; 832 llvm::Value *SizeInChars; 833 auto *ElemType = 834 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 835 ->getElementType(); 836 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 837 if (AsArraySection) { 838 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 839 OrigAddresses[N].first.getPointer(CGF)); 840 Size = CGF.Builder.CreateNUWAdd( 841 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 842 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 843 } else { 844 SizeInChars = 845 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 846 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 847 } 848 Sizes.emplace_back(SizeInChars, Size); 849 CodeGenFunction::OpaqueValueMapping OpaqueMap( 850 CGF, 851 cast<OpaqueValueExpr>( 852 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 853 RValue::get(Size)); 854 CGF.EmitVariablyModifiedType(PrivateType); 855 } 856 857 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 858 llvm::Value *Size) { 859 const auto *PrivateVD = 860 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 861 QualType PrivateType = PrivateVD->getType(); 862 if (!PrivateType->isVariablyModifiedType()) { 863 assert(!Size && !Sizes[N].second && 864 "Size should be nullptr for non-variably modified reduction " 865 "items."); 866 return; 867 } 868 CodeGenFunction::OpaqueValueMapping OpaqueMap( 869 CGF, 870 cast<OpaqueValueExpr>( 871 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 872 RValue::get(Size)); 873 CGF.EmitVariablyModifiedType(PrivateType); 874 } 875 876 void ReductionCodeGen::emitInitialization( 877 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 878 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 879 assert(SharedAddresses.size() > N && "No variable was generated"); 880 const auto *PrivateVD = 881 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 882 const OMPDeclareReductionDecl *DRD = 883 getReductionInit(ClausesData[N].ReductionOp); 884 QualType PrivateType = PrivateVD->getType(); 885 PrivateAddr = CGF.Builder.CreateElementBitCast( 886 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 887 QualType SharedType = SharedAddresses[N].first.getType(); 888 SharedLVal = CGF.MakeAddrLValue( 889 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 890 CGF.ConvertTypeForMem(SharedType)), 891 SharedType, SharedAddresses[N].first.getBaseInfo(), 892 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 893 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 894 if (DRD && DRD->getInitializer()) 895 (void)DefaultInit(CGF); 896 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 897 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 898 (void)DefaultInit(CGF); 899 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 900 PrivateAddr, SharedLVal.getAddress(CGF), 901 SharedLVal.getType()); 902 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 903 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 904 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 905 PrivateVD->getType().getQualifiers(), 906 /*IsInitializer=*/false); 907 } 908 } 909 910 bool ReductionCodeGen::needCleanups(unsigned N) { 911 const auto *PrivateVD = 912 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 913 QualType PrivateType = PrivateVD->getType(); 914 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 915 return DTorKind != QualType::DK_none; 916 } 917 918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 919 Address PrivateAddr) { 920 const auto *PrivateVD = 921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 922 QualType PrivateType = PrivateVD->getType(); 923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 924 if (needCleanups(N)) { 925 PrivateAddr = CGF.Builder.CreateElementBitCast( 926 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 927 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 928 } 929 } 930 931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 932 LValue BaseLV) { 933 BaseTy = BaseTy.getNonReferenceType(); 934 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 935 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 936 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 937 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 938 } else { 939 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 940 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 941 } 942 BaseTy = BaseTy->getPointeeType(); 943 } 944 return CGF.MakeAddrLValue( 945 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 946 CGF.ConvertTypeForMem(ElTy)), 947 BaseLV.getType(), BaseLV.getBaseInfo(), 948 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 949 } 950 951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 952 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 953 llvm::Value *Addr) { 954 Address Tmp = Address::invalid(); 955 Address TopTmp = Address::invalid(); 956 Address MostTopTmp = Address::invalid(); 957 BaseTy = BaseTy.getNonReferenceType(); 958 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 959 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 960 Tmp = CGF.CreateMemTemp(BaseTy); 961 if (TopTmp.isValid()) 962 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 963 else 964 MostTopTmp = Tmp; 965 TopTmp = Tmp; 966 BaseTy = BaseTy->getPointeeType(); 967 } 968 llvm::Type *Ty = BaseLVType; 969 if (Tmp.isValid()) 970 Ty = Tmp.getElementType(); 971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 972 if (Tmp.isValid()) { 973 CGF.Builder.CreateStore(Addr, Tmp); 974 return MostTopTmp; 975 } 976 return Address(Addr, BaseLVAlignment); 977 } 978 979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 980 const VarDecl *OrigVD = nullptr; 981 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 982 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 984 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 990 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 991 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 992 Base = TempASE->getBase()->IgnoreParenImpCasts(); 993 DE = cast<DeclRefExpr>(Base); 994 OrigVD = cast<VarDecl>(DE->getDecl()); 995 } 996 return OrigVD; 997 } 998 999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1000 Address PrivateAddr) { 1001 const DeclRefExpr *DE; 1002 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1003 BaseDecls.emplace_back(OrigVD); 1004 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1005 LValue BaseLValue = 1006 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1007 OriginalBaseLValue); 1008 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1009 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1010 llvm::Value *PrivatePointer = 1011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1012 PrivateAddr.getPointer(), 1013 SharedAddresses[N].first.getAddress(CGF).getType()); 1014 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1015 return castToBase(CGF, OrigVD->getType(), 1016 SharedAddresses[N].first.getType(), 1017 OriginalBaseLValue.getAddress(CGF).getType(), 1018 OriginalBaseLValue.getAlignment(), Ptr); 1019 } 1020 BaseDecls.emplace_back( 1021 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1022 return PrivateAddr; 1023 } 1024 1025 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1026 const OMPDeclareReductionDecl *DRD = 1027 getReductionInit(ClausesData[N].ReductionOp); 1028 return DRD && DRD->getInitializer(); 1029 } 1030 1031 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1032 return CGF.EmitLoadOfPointerLValue( 1033 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1034 getThreadIDVariable()->getType()->castAs<PointerType>()); 1035 } 1036 1037 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1038 if (!CGF.HaveInsertPoint()) 1039 return; 1040 // 1.2.2 OpenMP Language Terminology 1041 // Structured block - An executable statement with a single entry at the 1042 // top and a single exit at the bottom. 1043 // The point of exit cannot be a branch out of the structured block. 1044 // longjmp() and throw() must not violate the entry/exit criteria. 1045 CGF.EHStack.pushTerminate(); 1046 CodeGen(CGF); 1047 CGF.EHStack.popTerminate(); 1048 } 1049 1050 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1051 CodeGenFunction &CGF) { 1052 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1053 getThreadIDVariable()->getType(), 1054 AlignmentSource::Decl); 1055 } 1056 1057 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1058 QualType FieldTy) { 1059 auto *Field = FieldDecl::Create( 1060 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1061 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1062 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1063 Field->setAccess(AS_public); 1064 DC->addDecl(Field); 1065 return Field; 1066 } 1067 1068 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1069 StringRef Separator) 1070 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1071 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1072 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1073 1074 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1075 OMPBuilder.initialize(); 1076 loadOffloadInfoMetadata(); 1077 } 1078 1079 void CGOpenMPRuntime::clear() { 1080 InternalVars.clear(); 1081 // Clean non-target variable declarations possibly used only in debug info. 1082 for (const auto &Data : EmittedNonTargetVariables) { 1083 if (!Data.getValue().pointsToAliveValue()) 1084 continue; 1085 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1086 if (!GV) 1087 continue; 1088 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1089 continue; 1090 GV->eraseFromParent(); 1091 } 1092 } 1093 1094 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1095 SmallString<128> Buffer; 1096 llvm::raw_svector_ostream OS(Buffer); 1097 StringRef Sep = FirstSeparator; 1098 for (StringRef Part : Parts) { 1099 OS << Sep << Part; 1100 Sep = Separator; 1101 } 1102 return std::string(OS.str()); 1103 } 1104 1105 static llvm::Function * 1106 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1107 const Expr *CombinerInitializer, const VarDecl *In, 1108 const VarDecl *Out, bool IsCombiner) { 1109 // void .omp_combiner.(Ty *in, Ty *out); 1110 ASTContext &C = CGM.getContext(); 1111 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1112 FunctionArgList Args; 1113 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1114 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1115 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1116 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1117 Args.push_back(&OmpOutParm); 1118 Args.push_back(&OmpInParm); 1119 const CGFunctionInfo &FnInfo = 1120 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1121 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1122 std::string Name = CGM.getOpenMPRuntime().getName( 1123 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1124 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1125 Name, &CGM.getModule()); 1126 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1127 if (CGM.getLangOpts().Optimize) { 1128 Fn->removeFnAttr(llvm::Attribute::NoInline); 1129 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1130 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1131 } 1132 CodeGenFunction CGF(CGM); 1133 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1134 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1135 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1136 Out->getLocation()); 1137 CodeGenFunction::OMPPrivateScope Scope(CGF); 1138 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1139 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1140 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1141 .getAddress(CGF); 1142 }); 1143 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1144 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1145 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1146 .getAddress(CGF); 1147 }); 1148 (void)Scope.Privatize(); 1149 if (!IsCombiner && Out->hasInit() && 1150 !CGF.isTrivialInitializer(Out->getInit())) { 1151 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1152 Out->getType().getQualifiers(), 1153 /*IsInitializer=*/true); 1154 } 1155 if (CombinerInitializer) 1156 CGF.EmitIgnoredExpr(CombinerInitializer); 1157 Scope.ForceCleanup(); 1158 CGF.FinishFunction(); 1159 return Fn; 1160 } 1161 1162 void CGOpenMPRuntime::emitUserDefinedReduction( 1163 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1164 if (UDRMap.count(D) > 0) 1165 return; 1166 llvm::Function *Combiner = emitCombinerOrInitializer( 1167 CGM, D->getType(), D->getCombiner(), 1168 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1169 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1170 /*IsCombiner=*/true); 1171 llvm::Function *Initializer = nullptr; 1172 if (const Expr *Init = D->getInitializer()) { 1173 Initializer = emitCombinerOrInitializer( 1174 CGM, D->getType(), 1175 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1176 : nullptr, 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1179 /*IsCombiner=*/false); 1180 } 1181 UDRMap.try_emplace(D, Combiner, Initializer); 1182 if (CGF) { 1183 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1184 Decls.second.push_back(D); 1185 } 1186 } 1187 1188 std::pair<llvm::Function *, llvm::Function *> 1189 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1190 auto I = UDRMap.find(D); 1191 if (I != UDRMap.end()) 1192 return I->second; 1193 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1194 return UDRMap.lookup(D); 1195 } 1196 1197 namespace { 1198 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1199 // Builder if one is present. 1200 struct PushAndPopStackRAII { 1201 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1202 bool HasCancel) 1203 : OMPBuilder(OMPBuilder) { 1204 if (!OMPBuilder) 1205 return; 1206 1207 // The following callback is the crucial part of clangs cleanup process. 1208 // 1209 // NOTE: 1210 // Once the OpenMPIRBuilder is used to create parallel regions (and 1211 // similar), the cancellation destination (Dest below) is determined via 1212 // IP. That means if we have variables to finalize we split the block at IP, 1213 // use the new block (=BB) as destination to build a JumpDest (via 1214 // getJumpDestInCurrentScope(BB)) which then is fed to 1215 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1216 // to push & pop an FinalizationInfo object. 1217 // The FiniCB will still be needed but at the point where the 1218 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1219 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1220 assert(IP.getBlock()->end() == IP.getPoint() && 1221 "Clang CG should cause non-terminated block!"); 1222 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1223 CGF.Builder.restoreIP(IP); 1224 CodeGenFunction::JumpDest Dest = 1225 CGF.getOMPCancelDestination(OMPD_parallel); 1226 CGF.EmitBranchThroughCleanup(Dest); 1227 }; 1228 1229 // TODO: Remove this once we emit parallel regions through the 1230 // OpenMPIRBuilder as it can do this setup internally. 1231 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1232 {FiniCB, OMPD_parallel, HasCancel}); 1233 OMPBuilder->pushFinalizationCB(std::move(FI)); 1234 } 1235 ~PushAndPopStackRAII() { 1236 if (OMPBuilder) 1237 OMPBuilder->popFinalizationCB(); 1238 } 1239 llvm::OpenMPIRBuilder *OMPBuilder; 1240 }; 1241 } // namespace 1242 1243 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1244 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1245 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1246 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1247 assert(ThreadIDVar->getType()->isPointerType() && 1248 "thread id variable must be of type kmp_int32 *"); 1249 CodeGenFunction CGF(CGM, true); 1250 bool HasCancel = false; 1251 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1252 HasCancel = OPD->hasCancel(); 1253 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1254 HasCancel = OPD->hasCancel(); 1255 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1256 HasCancel = OPSD->hasCancel(); 1257 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1258 HasCancel = OPFD->hasCancel(); 1259 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1260 HasCancel = OPFD->hasCancel(); 1261 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 else if (const auto *OPFD = 1264 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 1270 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1271 // parallel region to make cancellation barriers work properly. 1272 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1273 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1274 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1275 HasCancel, OutlinedHelperName); 1276 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1277 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1278 } 1279 1280 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1281 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1282 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1283 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1284 return emitParallelOrTeamsOutlinedFunction( 1285 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1286 } 1287 1288 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1290 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1291 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1292 return emitParallelOrTeamsOutlinedFunction( 1293 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1294 } 1295 1296 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1297 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1298 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1300 bool Tied, unsigned &NumberOfParts) { 1301 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1302 PrePostActionTy &) { 1303 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1304 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1305 llvm::Value *TaskArgs[] = { 1306 UpLoc, ThreadID, 1307 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1308 TaskTVar->getType()->castAs<PointerType>()) 1309 .getPointer(CGF)}; 1310 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1311 CGM.getModule(), OMPRTL___kmpc_omp_task), 1312 TaskArgs); 1313 }; 1314 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1315 UntiedCodeGen); 1316 CodeGen.setAction(Action); 1317 assert(!ThreadIDVar->getType()->isPointerType() && 1318 "thread id variable must be of type kmp_int32 for tasks"); 1319 const OpenMPDirectiveKind Region = 1320 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1321 : OMPD_task; 1322 const CapturedStmt *CS = D.getCapturedStmt(Region); 1323 bool HasCancel = false; 1324 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1327 HasCancel = TD->hasCancel(); 1328 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1329 HasCancel = TD->hasCancel(); 1330 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 1333 CodeGenFunction CGF(CGM, true); 1334 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1335 InnermostKind, HasCancel, Action); 1336 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1337 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1338 if (!Tied) 1339 NumberOfParts = Action.getNumberOfParts(); 1340 return Res; 1341 } 1342 1343 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1344 const RecordDecl *RD, const CGRecordLayout &RL, 1345 ArrayRef<llvm::Constant *> Data) { 1346 llvm::StructType *StructTy = RL.getLLVMType(); 1347 unsigned PrevIdx = 0; 1348 ConstantInitBuilder CIBuilder(CGM); 1349 auto DI = Data.begin(); 1350 for (const FieldDecl *FD : RD->fields()) { 1351 unsigned Idx = RL.getLLVMFieldNo(FD); 1352 // Fill the alignment. 1353 for (unsigned I = PrevIdx; I < Idx; ++I) 1354 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1355 PrevIdx = Idx + 1; 1356 Fields.add(*DI); 1357 ++DI; 1358 } 1359 } 1360 1361 template <class... As> 1362 static llvm::GlobalVariable * 1363 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1364 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1365 As &&... Args) { 1366 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1367 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1368 ConstantInitBuilder CIBuilder(CGM); 1369 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1370 buildStructValue(Fields, CGM, RD, RL, Data); 1371 return Fields.finishAndCreateGlobal( 1372 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1373 std::forward<As>(Args)...); 1374 } 1375 1376 template <typename T> 1377 static void 1378 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1379 ArrayRef<llvm::Constant *> Data, 1380 T &Parent) { 1381 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1382 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1383 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1384 buildStructValue(Fields, CGM, RD, RL, Data); 1385 Fields.finishAndAddTo(Parent); 1386 } 1387 1388 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1389 bool AtCurrentPoint) { 1390 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1391 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1392 1393 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1394 if (AtCurrentPoint) { 1395 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1396 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1397 } else { 1398 Elem.second.ServiceInsertPt = 1399 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1400 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1401 } 1402 } 1403 1404 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1405 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1406 if (Elem.second.ServiceInsertPt) { 1407 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1408 Elem.second.ServiceInsertPt = nullptr; 1409 Ptr->eraseFromParent(); 1410 } 1411 } 1412 1413 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1414 SourceLocation Loc, 1415 SmallString<128> &Buffer) { 1416 llvm::raw_svector_ostream OS(Buffer); 1417 // Build debug location 1418 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1419 OS << ";" << PLoc.getFilename() << ";"; 1420 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1421 OS << FD->getQualifiedNameAsString(); 1422 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1423 return OS.str(); 1424 } 1425 1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1427 SourceLocation Loc, 1428 unsigned Flags) { 1429 llvm::Constant *SrcLocStr; 1430 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1431 Loc.isInvalid()) { 1432 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1433 } else { 1434 std::string FunctionName = ""; 1435 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1436 FunctionName = FD->getQualifiedNameAsString(); 1437 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1438 const char *FileName = PLoc.getFilename(); 1439 unsigned Line = PLoc.getLine(); 1440 unsigned Column = PLoc.getColumn(); 1441 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1442 Line, Column); 1443 } 1444 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1445 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1446 Reserved2Flags); 1447 } 1448 1449 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1450 SourceLocation Loc) { 1451 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1452 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1453 // the clang invariants used below might be broken. 1454 if (CGM.getLangOpts().OpenMPIRBuilder) { 1455 SmallString<128> Buffer; 1456 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1457 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1458 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1459 return OMPBuilder.getOrCreateThreadID( 1460 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1461 } 1462 1463 llvm::Value *ThreadID = nullptr; 1464 // Check whether we've already cached a load of the thread id in this 1465 // function. 1466 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1467 if (I != OpenMPLocThreadIDMap.end()) { 1468 ThreadID = I->second.ThreadID; 1469 if (ThreadID != nullptr) 1470 return ThreadID; 1471 } 1472 // If exceptions are enabled, do not use parameter to avoid possible crash. 1473 if (auto *OMPRegionInfo = 1474 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1475 if (OMPRegionInfo->getThreadIDVariable()) { 1476 // Check if this an outlined function with thread id passed as argument. 1477 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1478 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1479 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1480 !CGF.getLangOpts().CXXExceptions || 1481 CGF.Builder.GetInsertBlock() == TopBlock || 1482 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1483 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1484 TopBlock || 1485 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1486 CGF.Builder.GetInsertBlock()) { 1487 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1488 // If value loaded in entry block, cache it and use it everywhere in 1489 // function. 1490 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1491 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1492 Elem.second.ThreadID = ThreadID; 1493 } 1494 return ThreadID; 1495 } 1496 } 1497 } 1498 1499 // This is not an outlined function region - need to call __kmpc_int32 1500 // kmpc_global_thread_num(ident_t *loc). 1501 // Generate thread id value and cache this value for use across the 1502 // function. 1503 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1504 if (!Elem.second.ServiceInsertPt) 1505 setLocThreadIdInsertPt(CGF); 1506 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1507 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1508 llvm::CallInst *Call = CGF.Builder.CreateCall( 1509 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1510 OMPRTL___kmpc_global_thread_num), 1511 emitUpdateLocation(CGF, Loc)); 1512 Call->setCallingConv(CGF.getRuntimeCC()); 1513 Elem.second.ThreadID = Call; 1514 return Call; 1515 } 1516 1517 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1518 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1519 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1520 clearLocThreadIdInsertPt(CGF); 1521 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1522 } 1523 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1524 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1525 UDRMap.erase(D); 1526 FunctionUDRMap.erase(CGF.CurFn); 1527 } 1528 auto I = FunctionUDMMap.find(CGF.CurFn); 1529 if (I != FunctionUDMMap.end()) { 1530 for(const auto *D : I->second) 1531 UDMMap.erase(D); 1532 FunctionUDMMap.erase(I); 1533 } 1534 LastprivateConditionalToTypes.erase(CGF.CurFn); 1535 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1536 } 1537 1538 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1539 return OMPBuilder.IdentPtr; 1540 } 1541 1542 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1543 if (!Kmpc_MicroTy) { 1544 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1545 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1546 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1547 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1548 } 1549 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1550 } 1551 1552 llvm::FunctionCallee 1553 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1554 assert((IVSize == 32 || IVSize == 64) && 1555 "IV size is not compatible with the omp runtime"); 1556 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1557 : "__kmpc_for_static_init_4u") 1558 : (IVSigned ? "__kmpc_for_static_init_8" 1559 : "__kmpc_for_static_init_8u"); 1560 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1561 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1562 llvm::Type *TypeParams[] = { 1563 getIdentTyPointerTy(), // loc 1564 CGM.Int32Ty, // tid 1565 CGM.Int32Ty, // schedtype 1566 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1567 PtrTy, // p_lower 1568 PtrTy, // p_upper 1569 PtrTy, // p_stride 1570 ITy, // incr 1571 ITy // chunk 1572 }; 1573 auto *FnTy = 1574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1575 return CGM.CreateRuntimeFunction(FnTy, Name); 1576 } 1577 1578 llvm::FunctionCallee 1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1580 assert((IVSize == 32 || IVSize == 64) && 1581 "IV size is not compatible with the omp runtime"); 1582 StringRef Name = 1583 IVSize == 32 1584 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1585 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1586 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1587 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1588 CGM.Int32Ty, // tid 1589 CGM.Int32Ty, // schedtype 1590 ITy, // lower 1591 ITy, // upper 1592 ITy, // stride 1593 ITy // chunk 1594 }; 1595 auto *FnTy = 1596 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1597 return CGM.CreateRuntimeFunction(FnTy, Name); 1598 } 1599 1600 llvm::FunctionCallee 1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1602 assert((IVSize == 32 || IVSize == 64) && 1603 "IV size is not compatible with the omp runtime"); 1604 StringRef Name = 1605 IVSize == 32 1606 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1607 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1608 llvm::Type *TypeParams[] = { 1609 getIdentTyPointerTy(), // loc 1610 CGM.Int32Ty, // tid 1611 }; 1612 auto *FnTy = 1613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1614 return CGM.CreateRuntimeFunction(FnTy, Name); 1615 } 1616 1617 llvm::FunctionCallee 1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1619 assert((IVSize == 32 || IVSize == 64) && 1620 "IV size is not compatible with the omp runtime"); 1621 StringRef Name = 1622 IVSize == 32 1623 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1624 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1626 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1627 llvm::Type *TypeParams[] = { 1628 getIdentTyPointerTy(), // loc 1629 CGM.Int32Ty, // tid 1630 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1631 PtrTy, // p_lower 1632 PtrTy, // p_upper 1633 PtrTy // p_stride 1634 }; 1635 auto *FnTy = 1636 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1637 return CGM.CreateRuntimeFunction(FnTy, Name); 1638 } 1639 1640 /// Obtain information that uniquely identifies a target entry. This 1641 /// consists of the file and device IDs as well as line number associated with 1642 /// the relevant entry source location. 1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1644 unsigned &DeviceID, unsigned &FileID, 1645 unsigned &LineNum) { 1646 SourceManager &SM = C.getSourceManager(); 1647 1648 // The loc should be always valid and have a file ID (the user cannot use 1649 // #pragma directives in macros) 1650 1651 assert(Loc.isValid() && "Source location is expected to be always valid."); 1652 1653 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1654 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1655 1656 llvm::sys::fs::UniqueID ID; 1657 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1658 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1659 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1662 << PLoc.getFilename() << EC.message(); 1663 } 1664 1665 DeviceID = ID.getDevice(); 1666 FileID = ID.getFile(); 1667 LineNum = PLoc.getLine(); 1668 } 1669 1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1671 if (CGM.getLangOpts().OpenMPSimd) 1672 return Address::invalid(); 1673 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1674 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1675 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1676 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1677 HasRequiresUnifiedSharedMemory))) { 1678 SmallString<64> PtrName; 1679 { 1680 llvm::raw_svector_ostream OS(PtrName); 1681 OS << CGM.getMangledName(GlobalDecl(VD)); 1682 if (!VD->isExternallyVisible()) { 1683 unsigned DeviceID, FileID, Line; 1684 getTargetEntryUniqueInfo(CGM.getContext(), 1685 VD->getCanonicalDecl()->getBeginLoc(), 1686 DeviceID, FileID, Line); 1687 OS << llvm::format("_%x", FileID); 1688 } 1689 OS << "_decl_tgt_ref_ptr"; 1690 } 1691 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1692 if (!Ptr) { 1693 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1694 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1695 PtrName); 1696 1697 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1698 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1699 1700 if (!CGM.getLangOpts().OpenMPIsDevice) 1701 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1702 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1703 } 1704 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1705 } 1706 return Address::invalid(); 1707 } 1708 1709 llvm::Constant * 1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1711 assert(!CGM.getLangOpts().OpenMPUseTLS || 1712 !CGM.getContext().getTargetInfo().isTLSSupported()); 1713 // Lookup the entry, lazily creating it if necessary. 1714 std::string Suffix = getName({"cache", ""}); 1715 return getOrCreateInternalVariable( 1716 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1717 } 1718 1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1720 const VarDecl *VD, 1721 Address VDAddr, 1722 SourceLocation Loc) { 1723 if (CGM.getLangOpts().OpenMPUseTLS && 1724 CGM.getContext().getTargetInfo().isTLSSupported()) 1725 return VDAddr; 1726 1727 llvm::Type *VarTy = VDAddr.getElementType(); 1728 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1729 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1730 CGM.Int8PtrTy), 1731 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1732 getOrCreateThreadPrivateCache(VD)}; 1733 return Address(CGF.EmitRuntimeCall( 1734 OMPBuilder.getOrCreateRuntimeFunction( 1735 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1736 Args), 1737 VDAddr.getAlignment()); 1738 } 1739 1740 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1741 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1742 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1743 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1744 // library. 1745 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1746 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1747 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1748 OMPLoc); 1749 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1750 // to register constructor/destructor for variable. 1751 llvm::Value *Args[] = { 1752 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1753 Ctor, CopyCtor, Dtor}; 1754 CGF.EmitRuntimeCall( 1755 OMPBuilder.getOrCreateRuntimeFunction( 1756 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1757 Args); 1758 } 1759 1760 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1761 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1762 bool PerformInit, CodeGenFunction *CGF) { 1763 if (CGM.getLangOpts().OpenMPUseTLS && 1764 CGM.getContext().getTargetInfo().isTLSSupported()) 1765 return nullptr; 1766 1767 VD = VD->getDefinition(CGM.getContext()); 1768 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1769 QualType ASTTy = VD->getType(); 1770 1771 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1772 const Expr *Init = VD->getAnyInitializer(); 1773 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1774 // Generate function that re-emits the declaration's initializer into the 1775 // threadprivate copy of the variable VD 1776 CodeGenFunction CtorCGF(CGM); 1777 FunctionArgList Args; 1778 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1779 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1780 ImplicitParamDecl::Other); 1781 Args.push_back(&Dst); 1782 1783 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1784 CGM.getContext().VoidPtrTy, Args); 1785 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1786 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1787 llvm::Function *Fn = 1788 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1789 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1790 Args, Loc, Loc); 1791 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1792 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1793 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1794 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1795 Arg = CtorCGF.Builder.CreateElementBitCast( 1796 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1797 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1798 /*IsInitializer=*/true); 1799 ArgVal = CtorCGF.EmitLoadOfScalar( 1800 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1801 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1802 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1803 CtorCGF.FinishFunction(); 1804 Ctor = Fn; 1805 } 1806 if (VD->getType().isDestructedType() != QualType::DK_none) { 1807 // Generate function that emits destructor call for the threadprivate copy 1808 // of the variable VD 1809 CodeGenFunction DtorCGF(CGM); 1810 FunctionArgList Args; 1811 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1812 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1813 ImplicitParamDecl::Other); 1814 Args.push_back(&Dst); 1815 1816 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1817 CGM.getContext().VoidTy, Args); 1818 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1819 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1820 llvm::Function *Fn = 1821 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1822 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1823 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1824 Loc, Loc); 1825 // Create a scope with an artificial location for the body of this function. 1826 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1827 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1828 DtorCGF.GetAddrOfLocalVar(&Dst), 1829 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1830 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1831 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1832 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1833 DtorCGF.FinishFunction(); 1834 Dtor = Fn; 1835 } 1836 // Do not emit init function if it is not required. 1837 if (!Ctor && !Dtor) 1838 return nullptr; 1839 1840 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1841 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1842 /*isVarArg=*/false) 1843 ->getPointerTo(); 1844 // Copying constructor for the threadprivate variable. 1845 // Must be NULL - reserved by runtime, but currently it requires that this 1846 // parameter is always NULL. Otherwise it fires assertion. 1847 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1848 if (Ctor == nullptr) { 1849 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1850 /*isVarArg=*/false) 1851 ->getPointerTo(); 1852 Ctor = llvm::Constant::getNullValue(CtorTy); 1853 } 1854 if (Dtor == nullptr) { 1855 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 Dtor = llvm::Constant::getNullValue(DtorTy); 1859 } 1860 if (!CGF) { 1861 auto *InitFunctionTy = 1862 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1863 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1864 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1865 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1866 CodeGenFunction InitCGF(CGM); 1867 FunctionArgList ArgList; 1868 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1869 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1870 Loc, Loc); 1871 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1872 InitCGF.FinishFunction(); 1873 return InitFunction; 1874 } 1875 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1876 } 1877 return nullptr; 1878 } 1879 1880 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1881 llvm::GlobalVariable *Addr, 1882 bool PerformInit) { 1883 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1884 !CGM.getLangOpts().OpenMPIsDevice) 1885 return false; 1886 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1887 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1888 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1889 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1890 HasRequiresUnifiedSharedMemory)) 1891 return CGM.getLangOpts().OpenMPIsDevice; 1892 VD = VD->getDefinition(CGM.getContext()); 1893 assert(VD && "Unknown VarDecl"); 1894 1895 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1896 return CGM.getLangOpts().OpenMPIsDevice; 1897 1898 QualType ASTTy = VD->getType(); 1899 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1900 1901 // Produce the unique prefix to identify the new target regions. We use 1902 // the source location of the variable declaration which we know to not 1903 // conflict with any target region. 1904 unsigned DeviceID; 1905 unsigned FileID; 1906 unsigned Line; 1907 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1908 SmallString<128> Buffer, Out; 1909 { 1910 llvm::raw_svector_ostream OS(Buffer); 1911 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1912 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1913 } 1914 1915 const Expr *Init = VD->getAnyInitializer(); 1916 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1917 llvm::Constant *Ctor; 1918 llvm::Constant *ID; 1919 if (CGM.getLangOpts().OpenMPIsDevice) { 1920 // Generate function that re-emits the declaration's initializer into 1921 // the threadprivate copy of the variable VD 1922 CodeGenFunction CtorCGF(CGM); 1923 1924 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1925 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1926 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1927 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1928 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1929 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1930 FunctionArgList(), Loc, Loc); 1931 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1932 CtorCGF.EmitAnyExprToMem(Init, 1933 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1934 Init->getType().getQualifiers(), 1935 /*IsInitializer=*/true); 1936 CtorCGF.FinishFunction(); 1937 Ctor = Fn; 1938 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1939 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1940 } else { 1941 Ctor = new llvm::GlobalVariable( 1942 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1943 llvm::GlobalValue::PrivateLinkage, 1944 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1945 ID = Ctor; 1946 } 1947 1948 // Register the information for the entry associated with the constructor. 1949 Out.clear(); 1950 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1951 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1952 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1953 } 1954 if (VD->getType().isDestructedType() != QualType::DK_none) { 1955 llvm::Constant *Dtor; 1956 llvm::Constant *ID; 1957 if (CGM.getLangOpts().OpenMPIsDevice) { 1958 // Generate function that emits destructor call for the threadprivate 1959 // copy of the variable VD 1960 CodeGenFunction DtorCGF(CGM); 1961 1962 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1963 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1964 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1965 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1966 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1967 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1968 FunctionArgList(), Loc, Loc); 1969 // Create a scope with an artificial location for the body of this 1970 // function. 1971 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1972 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1973 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1974 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1975 DtorCGF.FinishFunction(); 1976 Dtor = Fn; 1977 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1978 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1979 } else { 1980 Dtor = new llvm::GlobalVariable( 1981 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1982 llvm::GlobalValue::PrivateLinkage, 1983 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1984 ID = Dtor; 1985 } 1986 // Register the information for the entry associated with the destructor. 1987 Out.clear(); 1988 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1989 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1990 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1991 } 1992 return CGM.getLangOpts().OpenMPIsDevice; 1993 } 1994 1995 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1996 QualType VarType, 1997 StringRef Name) { 1998 std::string Suffix = getName({"artificial", ""}); 1999 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2000 llvm::Value *GAddr = 2001 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2002 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2003 CGM.getTarget().isTLSSupported()) { 2004 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2005 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2006 } 2007 std::string CacheSuffix = getName({"cache", ""}); 2008 llvm::Value *Args[] = { 2009 emitUpdateLocation(CGF, SourceLocation()), 2010 getThreadID(CGF, SourceLocation()), 2011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2012 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2013 /*isSigned=*/false), 2014 getOrCreateInternalVariable( 2015 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2016 return Address( 2017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2018 CGF.EmitRuntimeCall( 2019 OMPBuilder.getOrCreateRuntimeFunction( 2020 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2021 Args), 2022 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2023 CGM.getContext().getTypeAlignInChars(VarType)); 2024 } 2025 2026 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2027 const RegionCodeGenTy &ThenGen, 2028 const RegionCodeGenTy &ElseGen) { 2029 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2030 2031 // If the condition constant folds and can be elided, try to avoid emitting 2032 // the condition and the dead arm of the if/else. 2033 bool CondConstant; 2034 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2035 if (CondConstant) 2036 ThenGen(CGF); 2037 else 2038 ElseGen(CGF); 2039 return; 2040 } 2041 2042 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2043 // emit the conditional branch. 2044 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2045 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2046 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2047 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2048 2049 // Emit the 'then' code. 2050 CGF.EmitBlock(ThenBlock); 2051 ThenGen(CGF); 2052 CGF.EmitBranch(ContBlock); 2053 // Emit the 'else' code if present. 2054 // There is no need to emit line number for unconditional branch. 2055 (void)ApplyDebugLocation::CreateEmpty(CGF); 2056 CGF.EmitBlock(ElseBlock); 2057 ElseGen(CGF); 2058 // There is no need to emit line number for unconditional branch. 2059 (void)ApplyDebugLocation::CreateEmpty(CGF); 2060 CGF.EmitBranch(ContBlock); 2061 // Emit the continuation block for code after the if. 2062 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2063 } 2064 2065 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2066 llvm::Function *OutlinedFn, 2067 ArrayRef<llvm::Value *> CapturedVars, 2068 const Expr *IfCond) { 2069 if (!CGF.HaveInsertPoint()) 2070 return; 2071 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2072 auto &M = CGM.getModule(); 2073 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2074 this](CodeGenFunction &CGF, PrePostActionTy &) { 2075 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2076 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2077 llvm::Value *Args[] = { 2078 RTLoc, 2079 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2080 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2081 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2082 RealArgs.append(std::begin(Args), std::end(Args)); 2083 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2084 2085 llvm::FunctionCallee RTLFn = 2086 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2087 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2088 }; 2089 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2090 this](CodeGenFunction &CGF, PrePostActionTy &) { 2091 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2092 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2093 // Build calls: 2094 // __kmpc_serialized_parallel(&Loc, GTid); 2095 llvm::Value *Args[] = {RTLoc, ThreadID}; 2096 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2097 M, OMPRTL___kmpc_serialized_parallel), 2098 Args); 2099 2100 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2101 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2102 Address ZeroAddrBound = 2103 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2104 /*Name=*/".bound.zero.addr"); 2105 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2106 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2107 // ThreadId for serialized parallels is 0. 2108 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2109 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2110 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2111 2112 // Ensure we do not inline the function. This is trivially true for the ones 2113 // passed to __kmpc_fork_call but the ones calles in serialized regions 2114 // could be inlined. This is not a perfect but it is closer to the invariant 2115 // we want, namely, every data environment starts with a new function. 2116 // TODO: We should pass the if condition to the runtime function and do the 2117 // handling there. Much cleaner code. 2118 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2119 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2120 2121 // __kmpc_end_serialized_parallel(&Loc, GTid); 2122 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2123 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2124 M, OMPRTL___kmpc_end_serialized_parallel), 2125 EndArgs); 2126 }; 2127 if (IfCond) { 2128 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2129 } else { 2130 RegionCodeGenTy ThenRCG(ThenGen); 2131 ThenRCG(CGF); 2132 } 2133 } 2134 2135 // If we're inside an (outlined) parallel region, use the region info's 2136 // thread-ID variable (it is passed in a first argument of the outlined function 2137 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2138 // regular serial code region, get thread ID by calling kmp_int32 2139 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2140 // return the address of that temp. 2141 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2142 SourceLocation Loc) { 2143 if (auto *OMPRegionInfo = 2144 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2145 if (OMPRegionInfo->getThreadIDVariable()) 2146 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2147 2148 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2149 QualType Int32Ty = 2150 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2151 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2152 CGF.EmitStoreOfScalar(ThreadID, 2153 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2154 2155 return ThreadIDTemp; 2156 } 2157 2158 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2159 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2160 SmallString<256> Buffer; 2161 llvm::raw_svector_ostream Out(Buffer); 2162 Out << Name; 2163 StringRef RuntimeName = Out.str(); 2164 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2165 if (Elem.second) { 2166 assert(Elem.second->getType()->getPointerElementType() == Ty && 2167 "OMP internal variable has different type than requested"); 2168 return &*Elem.second; 2169 } 2170 2171 return Elem.second = new llvm::GlobalVariable( 2172 CGM.getModule(), Ty, /*IsConstant*/ false, 2173 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2174 Elem.first(), /*InsertBefore=*/nullptr, 2175 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2176 } 2177 2178 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2179 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2180 std::string Name = getName({Prefix, "var"}); 2181 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2182 } 2183 2184 namespace { 2185 /// Common pre(post)-action for different OpenMP constructs. 2186 class CommonActionTy final : public PrePostActionTy { 2187 llvm::FunctionCallee EnterCallee; 2188 ArrayRef<llvm::Value *> EnterArgs; 2189 llvm::FunctionCallee ExitCallee; 2190 ArrayRef<llvm::Value *> ExitArgs; 2191 bool Conditional; 2192 llvm::BasicBlock *ContBlock = nullptr; 2193 2194 public: 2195 CommonActionTy(llvm::FunctionCallee EnterCallee, 2196 ArrayRef<llvm::Value *> EnterArgs, 2197 llvm::FunctionCallee ExitCallee, 2198 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2199 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2200 ExitArgs(ExitArgs), Conditional(Conditional) {} 2201 void Enter(CodeGenFunction &CGF) override { 2202 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2203 if (Conditional) { 2204 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2205 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2206 ContBlock = CGF.createBasicBlock("omp_if.end"); 2207 // Generate the branch (If-stmt) 2208 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2209 CGF.EmitBlock(ThenBlock); 2210 } 2211 } 2212 void Done(CodeGenFunction &CGF) { 2213 // Emit the rest of blocks/branches 2214 CGF.EmitBranch(ContBlock); 2215 CGF.EmitBlock(ContBlock, true); 2216 } 2217 void Exit(CodeGenFunction &CGF) override { 2218 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2219 } 2220 }; 2221 } // anonymous namespace 2222 2223 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2224 StringRef CriticalName, 2225 const RegionCodeGenTy &CriticalOpGen, 2226 SourceLocation Loc, const Expr *Hint) { 2227 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2228 // CriticalOpGen(); 2229 // __kmpc_end_critical(ident_t *, gtid, Lock); 2230 // Prepare arguments and build a call to __kmpc_critical 2231 if (!CGF.HaveInsertPoint()) 2232 return; 2233 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2234 getCriticalRegionLock(CriticalName)}; 2235 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2236 std::end(Args)); 2237 if (Hint) { 2238 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2239 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2240 } 2241 CommonActionTy Action( 2242 OMPBuilder.getOrCreateRuntimeFunction( 2243 CGM.getModule(), 2244 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2245 EnterArgs, 2246 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2247 OMPRTL___kmpc_end_critical), 2248 Args); 2249 CriticalOpGen.setAction(Action); 2250 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2251 } 2252 2253 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2254 const RegionCodeGenTy &MasterOpGen, 2255 SourceLocation Loc) { 2256 if (!CGF.HaveInsertPoint()) 2257 return; 2258 // if(__kmpc_master(ident_t *, gtid)) { 2259 // MasterOpGen(); 2260 // __kmpc_end_master(ident_t *, gtid); 2261 // } 2262 // Prepare arguments and build a call to __kmpc_master 2263 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2264 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2265 CGM.getModule(), OMPRTL___kmpc_master), 2266 Args, 2267 OMPBuilder.getOrCreateRuntimeFunction( 2268 CGM.getModule(), OMPRTL___kmpc_end_master), 2269 Args, 2270 /*Conditional=*/true); 2271 MasterOpGen.setAction(Action); 2272 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2273 Action.Done(CGF); 2274 } 2275 2276 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2277 SourceLocation Loc) { 2278 if (!CGF.HaveInsertPoint()) 2279 return; 2280 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2281 OMPBuilder.createTaskyield(CGF.Builder); 2282 } else { 2283 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2284 llvm::Value *Args[] = { 2285 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2286 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2287 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2288 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2289 Args); 2290 } 2291 2292 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2293 Region->emitUntiedSwitch(CGF); 2294 } 2295 2296 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2297 const RegionCodeGenTy &TaskgroupOpGen, 2298 SourceLocation Loc) { 2299 if (!CGF.HaveInsertPoint()) 2300 return; 2301 // __kmpc_taskgroup(ident_t *, gtid); 2302 // TaskgroupOpGen(); 2303 // __kmpc_end_taskgroup(ident_t *, gtid); 2304 // Prepare arguments and build a call to __kmpc_taskgroup 2305 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2306 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2307 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2308 Args, 2309 OMPBuilder.getOrCreateRuntimeFunction( 2310 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2311 Args); 2312 TaskgroupOpGen.setAction(Action); 2313 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2314 } 2315 2316 /// Given an array of pointers to variables, project the address of a 2317 /// given variable. 2318 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2319 unsigned Index, const VarDecl *Var) { 2320 // Pull out the pointer to the variable. 2321 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2322 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2323 2324 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2325 Addr = CGF.Builder.CreateElementBitCast( 2326 Addr, CGF.ConvertTypeForMem(Var->getType())); 2327 return Addr; 2328 } 2329 2330 static llvm::Value *emitCopyprivateCopyFunction( 2331 CodeGenModule &CGM, llvm::Type *ArgsType, 2332 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2333 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2334 SourceLocation Loc) { 2335 ASTContext &C = CGM.getContext(); 2336 // void copy_func(void *LHSArg, void *RHSArg); 2337 FunctionArgList Args; 2338 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2339 ImplicitParamDecl::Other); 2340 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2341 ImplicitParamDecl::Other); 2342 Args.push_back(&LHSArg); 2343 Args.push_back(&RHSArg); 2344 const auto &CGFI = 2345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2346 std::string Name = 2347 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2348 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2349 llvm::GlobalValue::InternalLinkage, Name, 2350 &CGM.getModule()); 2351 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2352 Fn->setDoesNotRecurse(); 2353 CodeGenFunction CGF(CGM); 2354 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2355 // Dest = (void*[n])(LHSArg); 2356 // Src = (void*[n])(RHSArg); 2357 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2358 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2359 ArgsType), CGF.getPointerAlign()); 2360 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2361 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2362 ArgsType), CGF.getPointerAlign()); 2363 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2364 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2365 // ... 2366 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2367 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2368 const auto *DestVar = 2369 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2370 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2371 2372 const auto *SrcVar = 2373 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2374 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2375 2376 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2377 QualType Type = VD->getType(); 2378 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2379 } 2380 CGF.FinishFunction(); 2381 return Fn; 2382 } 2383 2384 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2385 const RegionCodeGenTy &SingleOpGen, 2386 SourceLocation Loc, 2387 ArrayRef<const Expr *> CopyprivateVars, 2388 ArrayRef<const Expr *> SrcExprs, 2389 ArrayRef<const Expr *> DstExprs, 2390 ArrayRef<const Expr *> AssignmentOps) { 2391 if (!CGF.HaveInsertPoint()) 2392 return; 2393 assert(CopyprivateVars.size() == SrcExprs.size() && 2394 CopyprivateVars.size() == DstExprs.size() && 2395 CopyprivateVars.size() == AssignmentOps.size()); 2396 ASTContext &C = CGM.getContext(); 2397 // int32 did_it = 0; 2398 // if(__kmpc_single(ident_t *, gtid)) { 2399 // SingleOpGen(); 2400 // __kmpc_end_single(ident_t *, gtid); 2401 // did_it = 1; 2402 // } 2403 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2404 // <copy_func>, did_it); 2405 2406 Address DidIt = Address::invalid(); 2407 if (!CopyprivateVars.empty()) { 2408 // int32 did_it = 0; 2409 QualType KmpInt32Ty = 2410 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2411 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2412 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2413 } 2414 // Prepare arguments and build a call to __kmpc_single 2415 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2416 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2417 CGM.getModule(), OMPRTL___kmpc_single), 2418 Args, 2419 OMPBuilder.getOrCreateRuntimeFunction( 2420 CGM.getModule(), OMPRTL___kmpc_end_single), 2421 Args, 2422 /*Conditional=*/true); 2423 SingleOpGen.setAction(Action); 2424 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2425 if (DidIt.isValid()) { 2426 // did_it = 1; 2427 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2428 } 2429 Action.Done(CGF); 2430 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2431 // <copy_func>, did_it); 2432 if (DidIt.isValid()) { 2433 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2434 QualType CopyprivateArrayTy = C.getConstantArrayType( 2435 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2436 /*IndexTypeQuals=*/0); 2437 // Create a list of all private variables for copyprivate. 2438 Address CopyprivateList = 2439 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2440 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2441 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2442 CGF.Builder.CreateStore( 2443 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2444 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2445 CGF.VoidPtrTy), 2446 Elem); 2447 } 2448 // Build function that copies private values from single region to all other 2449 // threads in the corresponding parallel region. 2450 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2451 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2452 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2453 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2454 Address CL = 2455 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2456 CGF.VoidPtrTy); 2457 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2458 llvm::Value *Args[] = { 2459 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2460 getThreadID(CGF, Loc), // i32 <gtid> 2461 BufSize, // size_t <buf_size> 2462 CL.getPointer(), // void *<copyprivate list> 2463 CpyFn, // void (*) (void *, void *) <copy_func> 2464 DidItVal // i32 did_it 2465 }; 2466 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2467 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2468 Args); 2469 } 2470 } 2471 2472 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2473 const RegionCodeGenTy &OrderedOpGen, 2474 SourceLocation Loc, bool IsThreads) { 2475 if (!CGF.HaveInsertPoint()) 2476 return; 2477 // __kmpc_ordered(ident_t *, gtid); 2478 // OrderedOpGen(); 2479 // __kmpc_end_ordered(ident_t *, gtid); 2480 // Prepare arguments and build a call to __kmpc_ordered 2481 if (IsThreads) { 2482 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2483 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2484 CGM.getModule(), OMPRTL___kmpc_ordered), 2485 Args, 2486 OMPBuilder.getOrCreateRuntimeFunction( 2487 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2488 Args); 2489 OrderedOpGen.setAction(Action); 2490 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2491 return; 2492 } 2493 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2494 } 2495 2496 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2497 unsigned Flags; 2498 if (Kind == OMPD_for) 2499 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2500 else if (Kind == OMPD_sections) 2501 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2502 else if (Kind == OMPD_single) 2503 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2504 else if (Kind == OMPD_barrier) 2505 Flags = OMP_IDENT_BARRIER_EXPL; 2506 else 2507 Flags = OMP_IDENT_BARRIER_IMPL; 2508 return Flags; 2509 } 2510 2511 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2512 CodeGenFunction &CGF, const OMPLoopDirective &S, 2513 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2514 // Check if the loop directive is actually a doacross loop directive. In this 2515 // case choose static, 1 schedule. 2516 if (llvm::any_of( 2517 S.getClausesOfKind<OMPOrderedClause>(), 2518 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2519 ScheduleKind = OMPC_SCHEDULE_static; 2520 // Chunk size is 1 in this case. 2521 llvm::APInt ChunkSize(32, 1); 2522 ChunkExpr = IntegerLiteral::Create( 2523 CGF.getContext(), ChunkSize, 2524 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2525 SourceLocation()); 2526 } 2527 } 2528 2529 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2530 OpenMPDirectiveKind Kind, bool EmitChecks, 2531 bool ForceSimpleCall) { 2532 // Check if we should use the OMPBuilder 2533 auto *OMPRegionInfo = 2534 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2535 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2536 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2537 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2538 return; 2539 } 2540 2541 if (!CGF.HaveInsertPoint()) 2542 return; 2543 // Build call __kmpc_cancel_barrier(loc, thread_id); 2544 // Build call __kmpc_barrier(loc, thread_id); 2545 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2546 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2547 // thread_id); 2548 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2549 getThreadID(CGF, Loc)}; 2550 if (OMPRegionInfo) { 2551 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2552 llvm::Value *Result = CGF.EmitRuntimeCall( 2553 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2554 OMPRTL___kmpc_cancel_barrier), 2555 Args); 2556 if (EmitChecks) { 2557 // if (__kmpc_cancel_barrier()) { 2558 // exit from construct; 2559 // } 2560 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2561 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2562 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2563 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2564 CGF.EmitBlock(ExitBB); 2565 // exit from construct; 2566 CodeGenFunction::JumpDest CancelDestination = 2567 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2568 CGF.EmitBranchThroughCleanup(CancelDestination); 2569 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2570 } 2571 return; 2572 } 2573 } 2574 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2575 CGM.getModule(), OMPRTL___kmpc_barrier), 2576 Args); 2577 } 2578 2579 /// Map the OpenMP loop schedule to the runtime enumeration. 2580 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2581 bool Chunked, bool Ordered) { 2582 switch (ScheduleKind) { 2583 case OMPC_SCHEDULE_static: 2584 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2585 : (Ordered ? OMP_ord_static : OMP_sch_static); 2586 case OMPC_SCHEDULE_dynamic: 2587 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2588 case OMPC_SCHEDULE_guided: 2589 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2590 case OMPC_SCHEDULE_runtime: 2591 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2592 case OMPC_SCHEDULE_auto: 2593 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2594 case OMPC_SCHEDULE_unknown: 2595 assert(!Chunked && "chunk was specified but schedule kind not known"); 2596 return Ordered ? OMP_ord_static : OMP_sch_static; 2597 } 2598 llvm_unreachable("Unexpected runtime schedule"); 2599 } 2600 2601 /// Map the OpenMP distribute schedule to the runtime enumeration. 2602 static OpenMPSchedType 2603 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2604 // only static is allowed for dist_schedule 2605 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2606 } 2607 2608 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2609 bool Chunked) const { 2610 OpenMPSchedType Schedule = 2611 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2612 return Schedule == OMP_sch_static; 2613 } 2614 2615 bool CGOpenMPRuntime::isStaticNonchunked( 2616 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2617 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2618 return Schedule == OMP_dist_sch_static; 2619 } 2620 2621 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2622 bool Chunked) const { 2623 OpenMPSchedType Schedule = 2624 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2625 return Schedule == OMP_sch_static_chunked; 2626 } 2627 2628 bool CGOpenMPRuntime::isStaticChunked( 2629 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2630 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2631 return Schedule == OMP_dist_sch_static_chunked; 2632 } 2633 2634 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2635 OpenMPSchedType Schedule = 2636 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2637 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2638 return Schedule != OMP_sch_static; 2639 } 2640 2641 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2642 OpenMPScheduleClauseModifier M1, 2643 OpenMPScheduleClauseModifier M2) { 2644 int Modifier = 0; 2645 switch (M1) { 2646 case OMPC_SCHEDULE_MODIFIER_monotonic: 2647 Modifier = OMP_sch_modifier_monotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2650 Modifier = OMP_sch_modifier_nonmonotonic; 2651 break; 2652 case OMPC_SCHEDULE_MODIFIER_simd: 2653 if (Schedule == OMP_sch_static_chunked) 2654 Schedule = OMP_sch_static_balanced_chunked; 2655 break; 2656 case OMPC_SCHEDULE_MODIFIER_last: 2657 case OMPC_SCHEDULE_MODIFIER_unknown: 2658 break; 2659 } 2660 switch (M2) { 2661 case OMPC_SCHEDULE_MODIFIER_monotonic: 2662 Modifier = OMP_sch_modifier_monotonic; 2663 break; 2664 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2665 Modifier = OMP_sch_modifier_nonmonotonic; 2666 break; 2667 case OMPC_SCHEDULE_MODIFIER_simd: 2668 if (Schedule == OMP_sch_static_chunked) 2669 Schedule = OMP_sch_static_balanced_chunked; 2670 break; 2671 case OMPC_SCHEDULE_MODIFIER_last: 2672 case OMPC_SCHEDULE_MODIFIER_unknown: 2673 break; 2674 } 2675 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2676 // If the static schedule kind is specified or if the ordered clause is 2677 // specified, and if the nonmonotonic modifier is not specified, the effect is 2678 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2679 // modifier is specified, the effect is as if the nonmonotonic modifier is 2680 // specified. 2681 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2682 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2683 Schedule == OMP_sch_static_balanced_chunked || 2684 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2685 Schedule == OMP_dist_sch_static_chunked || 2686 Schedule == OMP_dist_sch_static)) 2687 Modifier = OMP_sch_modifier_nonmonotonic; 2688 } 2689 return Schedule | Modifier; 2690 } 2691 2692 void CGOpenMPRuntime::emitForDispatchInit( 2693 CodeGenFunction &CGF, SourceLocation Loc, 2694 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2695 bool Ordered, const DispatchRTInput &DispatchValues) { 2696 if (!CGF.HaveInsertPoint()) 2697 return; 2698 OpenMPSchedType Schedule = getRuntimeSchedule( 2699 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2700 assert(Ordered || 2701 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2702 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2703 Schedule != OMP_sch_static_balanced_chunked)); 2704 // Call __kmpc_dispatch_init( 2705 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2706 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2707 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2708 2709 // If the Chunk was not specified in the clause - use default value 1. 2710 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2711 : CGF.Builder.getIntN(IVSize, 1); 2712 llvm::Value *Args[] = { 2713 emitUpdateLocation(CGF, Loc), 2714 getThreadID(CGF, Loc), 2715 CGF.Builder.getInt32(addMonoNonMonoModifier( 2716 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2717 DispatchValues.LB, // Lower 2718 DispatchValues.UB, // Upper 2719 CGF.Builder.getIntN(IVSize, 1), // Stride 2720 Chunk // Chunk 2721 }; 2722 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2723 } 2724 2725 static void emitForStaticInitCall( 2726 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2727 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2728 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2729 const CGOpenMPRuntime::StaticRTInput &Values) { 2730 if (!CGF.HaveInsertPoint()) 2731 return; 2732 2733 assert(!Values.Ordered); 2734 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2735 Schedule == OMP_sch_static_balanced_chunked || 2736 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2737 Schedule == OMP_dist_sch_static || 2738 Schedule == OMP_dist_sch_static_chunked); 2739 2740 // Call __kmpc_for_static_init( 2741 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2742 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2743 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2744 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2745 llvm::Value *Chunk = Values.Chunk; 2746 if (Chunk == nullptr) { 2747 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2748 Schedule == OMP_dist_sch_static) && 2749 "expected static non-chunked schedule"); 2750 // If the Chunk was not specified in the clause - use default value 1. 2751 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2752 } else { 2753 assert((Schedule == OMP_sch_static_chunked || 2754 Schedule == OMP_sch_static_balanced_chunked || 2755 Schedule == OMP_ord_static_chunked || 2756 Schedule == OMP_dist_sch_static_chunked) && 2757 "expected static chunked schedule"); 2758 } 2759 llvm::Value *Args[] = { 2760 UpdateLocation, 2761 ThreadId, 2762 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2763 M2)), // Schedule type 2764 Values.IL.getPointer(), // &isLastIter 2765 Values.LB.getPointer(), // &LB 2766 Values.UB.getPointer(), // &UB 2767 Values.ST.getPointer(), // &Stride 2768 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2769 Chunk // Chunk 2770 }; 2771 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2772 } 2773 2774 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2775 SourceLocation Loc, 2776 OpenMPDirectiveKind DKind, 2777 const OpenMPScheduleTy &ScheduleKind, 2778 const StaticRTInput &Values) { 2779 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2780 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2781 assert(isOpenMPWorksharingDirective(DKind) && 2782 "Expected loop-based or sections-based directive."); 2783 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2784 isOpenMPLoopDirective(DKind) 2785 ? OMP_IDENT_WORK_LOOP 2786 : OMP_IDENT_WORK_SECTIONS); 2787 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2788 llvm::FunctionCallee StaticInitFunction = 2789 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2790 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2791 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2792 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2793 } 2794 2795 void CGOpenMPRuntime::emitDistributeStaticInit( 2796 CodeGenFunction &CGF, SourceLocation Loc, 2797 OpenMPDistScheduleClauseKind SchedKind, 2798 const CGOpenMPRuntime::StaticRTInput &Values) { 2799 OpenMPSchedType ScheduleNum = 2800 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2801 llvm::Value *UpdatedLocation = 2802 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2803 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2804 llvm::FunctionCallee StaticInitFunction = 2805 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2806 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2807 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2808 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2809 } 2810 2811 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2812 SourceLocation Loc, 2813 OpenMPDirectiveKind DKind) { 2814 if (!CGF.HaveInsertPoint()) 2815 return; 2816 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2817 llvm::Value *Args[] = { 2818 emitUpdateLocation(CGF, Loc, 2819 isOpenMPDistributeDirective(DKind) 2820 ? OMP_IDENT_WORK_DISTRIBUTE 2821 : isOpenMPLoopDirective(DKind) 2822 ? OMP_IDENT_WORK_LOOP 2823 : OMP_IDENT_WORK_SECTIONS), 2824 getThreadID(CGF, Loc)}; 2825 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2826 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2827 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2828 Args); 2829 } 2830 2831 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2832 SourceLocation Loc, 2833 unsigned IVSize, 2834 bool IVSigned) { 2835 if (!CGF.HaveInsertPoint()) 2836 return; 2837 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2838 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2839 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2840 } 2841 2842 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2843 SourceLocation Loc, unsigned IVSize, 2844 bool IVSigned, Address IL, 2845 Address LB, Address UB, 2846 Address ST) { 2847 // Call __kmpc_dispatch_next( 2848 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2849 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2850 // kmp_int[32|64] *p_stride); 2851 llvm::Value *Args[] = { 2852 emitUpdateLocation(CGF, Loc), 2853 getThreadID(CGF, Loc), 2854 IL.getPointer(), // &isLastIter 2855 LB.getPointer(), // &Lower 2856 UB.getPointer(), // &Upper 2857 ST.getPointer() // &Stride 2858 }; 2859 llvm::Value *Call = 2860 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2861 return CGF.EmitScalarConversion( 2862 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2863 CGF.getContext().BoolTy, Loc); 2864 } 2865 2866 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2867 llvm::Value *NumThreads, 2868 SourceLocation Loc) { 2869 if (!CGF.HaveInsertPoint()) 2870 return; 2871 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2872 llvm::Value *Args[] = { 2873 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2874 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2875 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2876 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2877 Args); 2878 } 2879 2880 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2881 ProcBindKind ProcBind, 2882 SourceLocation Loc) { 2883 if (!CGF.HaveInsertPoint()) 2884 return; 2885 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2886 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2887 llvm::Value *Args[] = { 2888 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2889 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2890 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2891 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2892 Args); 2893 } 2894 2895 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2896 SourceLocation Loc, llvm::AtomicOrdering AO) { 2897 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2898 OMPBuilder.createFlush(CGF.Builder); 2899 } else { 2900 if (!CGF.HaveInsertPoint()) 2901 return; 2902 // Build call void __kmpc_flush(ident_t *loc) 2903 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2904 CGM.getModule(), OMPRTL___kmpc_flush), 2905 emitUpdateLocation(CGF, Loc)); 2906 } 2907 } 2908 2909 namespace { 2910 /// Indexes of fields for type kmp_task_t. 2911 enum KmpTaskTFields { 2912 /// List of shared variables. 2913 KmpTaskTShareds, 2914 /// Task routine. 2915 KmpTaskTRoutine, 2916 /// Partition id for the untied tasks. 2917 KmpTaskTPartId, 2918 /// Function with call of destructors for private variables. 2919 Data1, 2920 /// Task priority. 2921 Data2, 2922 /// (Taskloops only) Lower bound. 2923 KmpTaskTLowerBound, 2924 /// (Taskloops only) Upper bound. 2925 KmpTaskTUpperBound, 2926 /// (Taskloops only) Stride. 2927 KmpTaskTStride, 2928 /// (Taskloops only) Is last iteration flag. 2929 KmpTaskTLastIter, 2930 /// (Taskloops only) Reduction data. 2931 KmpTaskTReductions, 2932 }; 2933 } // anonymous namespace 2934 2935 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2936 return OffloadEntriesTargetRegion.empty() && 2937 OffloadEntriesDeviceGlobalVar.empty(); 2938 } 2939 2940 /// Initialize target region entry. 2941 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2942 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2943 StringRef ParentName, unsigned LineNum, 2944 unsigned Order) { 2945 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2946 "only required for the device " 2947 "code generation."); 2948 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2949 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2950 OMPTargetRegionEntryTargetRegion); 2951 ++OffloadingEntriesNum; 2952 } 2953 2954 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2955 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2956 StringRef ParentName, unsigned LineNum, 2957 llvm::Constant *Addr, llvm::Constant *ID, 2958 OMPTargetRegionEntryKind Flags) { 2959 // If we are emitting code for a target, the entry is already initialized, 2960 // only has to be registered. 2961 if (CGM.getLangOpts().OpenMPIsDevice) { 2962 // This could happen if the device compilation is invoked standalone. 2963 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2964 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2965 OffloadingEntriesNum); 2966 auto &Entry = 2967 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2968 Entry.setAddress(Addr); 2969 Entry.setID(ID); 2970 Entry.setFlags(Flags); 2971 } else { 2972 if (Flags == 2973 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2974 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2975 /*IgnoreAddressId*/ true)) 2976 return; 2977 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2978 "Target region entry already registered!"); 2979 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2980 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2981 ++OffloadingEntriesNum; 2982 } 2983 } 2984 2985 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2986 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2987 bool IgnoreAddressId) const { 2988 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2989 if (PerDevice == OffloadEntriesTargetRegion.end()) 2990 return false; 2991 auto PerFile = PerDevice->second.find(FileID); 2992 if (PerFile == PerDevice->second.end()) 2993 return false; 2994 auto PerParentName = PerFile->second.find(ParentName); 2995 if (PerParentName == PerFile->second.end()) 2996 return false; 2997 auto PerLine = PerParentName->second.find(LineNum); 2998 if (PerLine == PerParentName->second.end()) 2999 return false; 3000 // Fail if this entry is already registered. 3001 if (!IgnoreAddressId && 3002 (PerLine->second.getAddress() || PerLine->second.getID())) 3003 return false; 3004 return true; 3005 } 3006 3007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3008 const OffloadTargetRegionEntryInfoActTy &Action) { 3009 // Scan all target region entries and perform the provided action. 3010 for (const auto &D : OffloadEntriesTargetRegion) 3011 for (const auto &F : D.second) 3012 for (const auto &P : F.second) 3013 for (const auto &L : P.second) 3014 Action(D.first, F.first, P.first(), L.first, L.second); 3015 } 3016 3017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3018 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3019 OMPTargetGlobalVarEntryKind Flags, 3020 unsigned Order) { 3021 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3022 "only required for the device " 3023 "code generation."); 3024 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3025 ++OffloadingEntriesNum; 3026 } 3027 3028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3029 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3030 CharUnits VarSize, 3031 OMPTargetGlobalVarEntryKind Flags, 3032 llvm::GlobalValue::LinkageTypes Linkage) { 3033 if (CGM.getLangOpts().OpenMPIsDevice) { 3034 // This could happen if the device compilation is invoked standalone. 3035 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3036 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3037 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3038 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3039 "Resetting with the new address."); 3040 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3041 if (Entry.getVarSize().isZero()) { 3042 Entry.setVarSize(VarSize); 3043 Entry.setLinkage(Linkage); 3044 } 3045 return; 3046 } 3047 Entry.setVarSize(VarSize); 3048 Entry.setLinkage(Linkage); 3049 Entry.setAddress(Addr); 3050 } else { 3051 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3052 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3053 assert(Entry.isValid() && Entry.getFlags() == Flags && 3054 "Entry not initialized!"); 3055 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3056 "Resetting with the new address."); 3057 if (Entry.getVarSize().isZero()) { 3058 Entry.setVarSize(VarSize); 3059 Entry.setLinkage(Linkage); 3060 } 3061 return; 3062 } 3063 OffloadEntriesDeviceGlobalVar.try_emplace( 3064 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3065 ++OffloadingEntriesNum; 3066 } 3067 } 3068 3069 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3070 actOnDeviceGlobalVarEntriesInfo( 3071 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3072 // Scan all target region entries and perform the provided action. 3073 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3074 Action(E.getKey(), E.getValue()); 3075 } 3076 3077 void CGOpenMPRuntime::createOffloadEntry( 3078 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3079 llvm::GlobalValue::LinkageTypes Linkage) { 3080 StringRef Name = Addr->getName(); 3081 llvm::Module &M = CGM.getModule(); 3082 llvm::LLVMContext &C = M.getContext(); 3083 3084 // Create constant string with the name. 3085 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3086 3087 std::string StringName = getName({"omp_offloading", "entry_name"}); 3088 auto *Str = new llvm::GlobalVariable( 3089 M, StrPtrInit->getType(), /*isConstant=*/true, 3090 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3091 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3092 3093 llvm::Constant *Data[] = { 3094 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3095 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3096 llvm::ConstantInt::get(CGM.SizeTy, Size), 3097 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3098 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3099 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3100 llvm::GlobalVariable *Entry = createGlobalStruct( 3101 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3102 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3103 3104 // The entry has to be created in the section the linker expects it to be. 3105 Entry->setSection("omp_offloading_entries"); 3106 } 3107 3108 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3109 // Emit the offloading entries and metadata so that the device codegen side 3110 // can easily figure out what to emit. The produced metadata looks like 3111 // this: 3112 // 3113 // !omp_offload.info = !{!1, ...} 3114 // 3115 // Right now we only generate metadata for function that contain target 3116 // regions. 3117 3118 // If we are in simd mode or there are no entries, we don't need to do 3119 // anything. 3120 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3121 return; 3122 3123 llvm::Module &M = CGM.getModule(); 3124 llvm::LLVMContext &C = M.getContext(); 3125 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3126 SourceLocation, StringRef>, 3127 16> 3128 OrderedEntries(OffloadEntriesInfoManager.size()); 3129 llvm::SmallVector<StringRef, 16> ParentFunctions( 3130 OffloadEntriesInfoManager.size()); 3131 3132 // Auxiliary methods to create metadata values and strings. 3133 auto &&GetMDInt = [this](unsigned V) { 3134 return llvm::ConstantAsMetadata::get( 3135 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3136 }; 3137 3138 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3139 3140 // Create the offloading info metadata node. 3141 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3142 3143 // Create function that emits metadata for each target region entry; 3144 auto &&TargetRegionMetadataEmitter = 3145 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3146 &GetMDString]( 3147 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3148 unsigned Line, 3149 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3150 // Generate metadata for target regions. Each entry of this metadata 3151 // contains: 3152 // - Entry 0 -> Kind of this type of metadata (0). 3153 // - Entry 1 -> Device ID of the file where the entry was identified. 3154 // - Entry 2 -> File ID of the file where the entry was identified. 3155 // - Entry 3 -> Mangled name of the function where the entry was 3156 // identified. 3157 // - Entry 4 -> Line in the file where the entry was identified. 3158 // - Entry 5 -> Order the entry was created. 3159 // The first element of the metadata node is the kind. 3160 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3161 GetMDInt(FileID), GetMDString(ParentName), 3162 GetMDInt(Line), GetMDInt(E.getOrder())}; 3163 3164 SourceLocation Loc; 3165 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3166 E = CGM.getContext().getSourceManager().fileinfo_end(); 3167 I != E; ++I) { 3168 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3169 I->getFirst()->getUniqueID().getFile() == FileID) { 3170 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3171 I->getFirst(), Line, 1); 3172 break; 3173 } 3174 } 3175 // Save this entry in the right position of the ordered entries array. 3176 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3177 ParentFunctions[E.getOrder()] = ParentName; 3178 3179 // Add metadata to the named metadata node. 3180 MD->addOperand(llvm::MDNode::get(C, Ops)); 3181 }; 3182 3183 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3184 TargetRegionMetadataEmitter); 3185 3186 // Create function that emits metadata for each device global variable entry; 3187 auto &&DeviceGlobalVarMetadataEmitter = 3188 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3189 MD](StringRef MangledName, 3190 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3191 &E) { 3192 // Generate metadata for global variables. Each entry of this metadata 3193 // contains: 3194 // - Entry 0 -> Kind of this type of metadata (1). 3195 // - Entry 1 -> Mangled name of the variable. 3196 // - Entry 2 -> Declare target kind. 3197 // - Entry 3 -> Order the entry was created. 3198 // The first element of the metadata node is the kind. 3199 llvm::Metadata *Ops[] = { 3200 GetMDInt(E.getKind()), GetMDString(MangledName), 3201 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3202 3203 // Save this entry in the right position of the ordered entries array. 3204 OrderedEntries[E.getOrder()] = 3205 std::make_tuple(&E, SourceLocation(), MangledName); 3206 3207 // Add metadata to the named metadata node. 3208 MD->addOperand(llvm::MDNode::get(C, Ops)); 3209 }; 3210 3211 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3212 DeviceGlobalVarMetadataEmitter); 3213 3214 for (const auto &E : OrderedEntries) { 3215 assert(std::get<0>(E) && "All ordered entries must exist!"); 3216 if (const auto *CE = 3217 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3218 std::get<0>(E))) { 3219 if (!CE->getID() || !CE->getAddress()) { 3220 // Do not blame the entry if the parent funtion is not emitted. 3221 StringRef FnName = ParentFunctions[CE->getOrder()]; 3222 if (!CGM.GetGlobalValue(FnName)) 3223 continue; 3224 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3225 DiagnosticsEngine::Error, 3226 "Offloading entry for target region in %0 is incorrect: either the " 3227 "address or the ID is invalid."); 3228 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3229 continue; 3230 } 3231 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3232 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3233 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3234 OffloadEntryInfoDeviceGlobalVar>( 3235 std::get<0>(E))) { 3236 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3237 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3238 CE->getFlags()); 3239 switch (Flags) { 3240 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3241 if (CGM.getLangOpts().OpenMPIsDevice && 3242 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3243 continue; 3244 if (!CE->getAddress()) { 3245 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3246 DiagnosticsEngine::Error, "Offloading entry for declare target " 3247 "variable %0 is incorrect: the " 3248 "address is invalid."); 3249 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3250 continue; 3251 } 3252 // The vaiable has no definition - no need to add the entry. 3253 if (CE->getVarSize().isZero()) 3254 continue; 3255 break; 3256 } 3257 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3258 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3259 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3260 "Declaret target link address is set."); 3261 if (CGM.getLangOpts().OpenMPIsDevice) 3262 continue; 3263 if (!CE->getAddress()) { 3264 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3265 DiagnosticsEngine::Error, 3266 "Offloading entry for declare target variable is incorrect: the " 3267 "address is invalid."); 3268 CGM.getDiags().Report(DiagID); 3269 continue; 3270 } 3271 break; 3272 } 3273 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3274 CE->getVarSize().getQuantity(), Flags, 3275 CE->getLinkage()); 3276 } else { 3277 llvm_unreachable("Unsupported entry kind."); 3278 } 3279 } 3280 } 3281 3282 /// Loads all the offload entries information from the host IR 3283 /// metadata. 3284 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3285 // If we are in target mode, load the metadata from the host IR. This code has 3286 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3287 3288 if (!CGM.getLangOpts().OpenMPIsDevice) 3289 return; 3290 3291 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3292 return; 3293 3294 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3295 if (auto EC = Buf.getError()) { 3296 CGM.getDiags().Report(diag::err_cannot_open_file) 3297 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3298 return; 3299 } 3300 3301 llvm::LLVMContext C; 3302 auto ME = expectedToErrorOrAndEmitErrors( 3303 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3304 3305 if (auto EC = ME.getError()) { 3306 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3307 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3308 CGM.getDiags().Report(DiagID) 3309 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3310 return; 3311 } 3312 3313 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3314 if (!MD) 3315 return; 3316 3317 for (llvm::MDNode *MN : MD->operands()) { 3318 auto &&GetMDInt = [MN](unsigned Idx) { 3319 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3320 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3321 }; 3322 3323 auto &&GetMDString = [MN](unsigned Idx) { 3324 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3325 return V->getString(); 3326 }; 3327 3328 switch (GetMDInt(0)) { 3329 default: 3330 llvm_unreachable("Unexpected metadata!"); 3331 break; 3332 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3333 OffloadingEntryInfoTargetRegion: 3334 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3335 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3336 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3337 /*Order=*/GetMDInt(5)); 3338 break; 3339 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3340 OffloadingEntryInfoDeviceGlobalVar: 3341 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3342 /*MangledName=*/GetMDString(1), 3343 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3344 /*Flags=*/GetMDInt(2)), 3345 /*Order=*/GetMDInt(3)); 3346 break; 3347 } 3348 } 3349 } 3350 3351 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3352 if (!KmpRoutineEntryPtrTy) { 3353 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3354 ASTContext &C = CGM.getContext(); 3355 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3356 FunctionProtoType::ExtProtoInfo EPI; 3357 KmpRoutineEntryPtrQTy = C.getPointerType( 3358 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3359 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3360 } 3361 } 3362 3363 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3364 // Make sure the type of the entry is already created. This is the type we 3365 // have to create: 3366 // struct __tgt_offload_entry{ 3367 // void *addr; // Pointer to the offload entry info. 3368 // // (function or global) 3369 // char *name; // Name of the function or global. 3370 // size_t size; // Size of the entry info (0 if it a function). 3371 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3372 // int32_t reserved; // Reserved, to use by the runtime library. 3373 // }; 3374 if (TgtOffloadEntryQTy.isNull()) { 3375 ASTContext &C = CGM.getContext(); 3376 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3377 RD->startDefinition(); 3378 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3379 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3380 addFieldToRecordDecl(C, RD, C.getSizeType()); 3381 addFieldToRecordDecl( 3382 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3383 addFieldToRecordDecl( 3384 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3385 RD->completeDefinition(); 3386 RD->addAttr(PackedAttr::CreateImplicit(C)); 3387 TgtOffloadEntryQTy = C.getRecordType(RD); 3388 } 3389 return TgtOffloadEntryQTy; 3390 } 3391 3392 namespace { 3393 struct PrivateHelpersTy { 3394 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3395 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3396 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3397 PrivateElemInit(PrivateElemInit) {} 3398 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3399 const Expr *OriginalRef = nullptr; 3400 const VarDecl *Original = nullptr; 3401 const VarDecl *PrivateCopy = nullptr; 3402 const VarDecl *PrivateElemInit = nullptr; 3403 bool isLocalPrivate() const { 3404 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3405 } 3406 }; 3407 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3408 } // anonymous namespace 3409 3410 static bool isAllocatableDecl(const VarDecl *VD) { 3411 const VarDecl *CVD = VD->getCanonicalDecl(); 3412 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3413 return false; 3414 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3415 // Use the default allocation. 3416 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3417 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3418 !AA->getAllocator()); 3419 } 3420 3421 static RecordDecl * 3422 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3423 if (!Privates.empty()) { 3424 ASTContext &C = CGM.getContext(); 3425 // Build struct .kmp_privates_t. { 3426 // /* private vars */ 3427 // }; 3428 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3429 RD->startDefinition(); 3430 for (const auto &Pair : Privates) { 3431 const VarDecl *VD = Pair.second.Original; 3432 QualType Type = VD->getType().getNonReferenceType(); 3433 // If the private variable is a local variable with lvalue ref type, 3434 // allocate the pointer instead of the pointee type. 3435 if (Pair.second.isLocalPrivate()) { 3436 if (VD->getType()->isLValueReferenceType()) 3437 Type = C.getPointerType(Type); 3438 if (isAllocatableDecl(VD)) 3439 Type = C.getPointerType(Type); 3440 } 3441 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3442 if (VD->hasAttrs()) { 3443 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3444 E(VD->getAttrs().end()); 3445 I != E; ++I) 3446 FD->addAttr(*I); 3447 } 3448 } 3449 RD->completeDefinition(); 3450 return RD; 3451 } 3452 return nullptr; 3453 } 3454 3455 static RecordDecl * 3456 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3457 QualType KmpInt32Ty, 3458 QualType KmpRoutineEntryPointerQTy) { 3459 ASTContext &C = CGM.getContext(); 3460 // Build struct kmp_task_t { 3461 // void * shareds; 3462 // kmp_routine_entry_t routine; 3463 // kmp_int32 part_id; 3464 // kmp_cmplrdata_t data1; 3465 // kmp_cmplrdata_t data2; 3466 // For taskloops additional fields: 3467 // kmp_uint64 lb; 3468 // kmp_uint64 ub; 3469 // kmp_int64 st; 3470 // kmp_int32 liter; 3471 // void * reductions; 3472 // }; 3473 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3474 UD->startDefinition(); 3475 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3476 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3477 UD->completeDefinition(); 3478 QualType KmpCmplrdataTy = C.getRecordType(UD); 3479 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3480 RD->startDefinition(); 3481 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3482 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3483 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3484 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3485 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3486 if (isOpenMPTaskLoopDirective(Kind)) { 3487 QualType KmpUInt64Ty = 3488 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3489 QualType KmpInt64Ty = 3490 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3491 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3492 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3493 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3494 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3495 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3496 } 3497 RD->completeDefinition(); 3498 return RD; 3499 } 3500 3501 static RecordDecl * 3502 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3503 ArrayRef<PrivateDataTy> Privates) { 3504 ASTContext &C = CGM.getContext(); 3505 // Build struct kmp_task_t_with_privates { 3506 // kmp_task_t task_data; 3507 // .kmp_privates_t. privates; 3508 // }; 3509 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3510 RD->startDefinition(); 3511 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3512 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3513 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3514 RD->completeDefinition(); 3515 return RD; 3516 } 3517 3518 /// Emit a proxy function which accepts kmp_task_t as the second 3519 /// argument. 3520 /// \code 3521 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3522 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3523 /// For taskloops: 3524 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3525 /// tt->reductions, tt->shareds); 3526 /// return 0; 3527 /// } 3528 /// \endcode 3529 static llvm::Function * 3530 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3531 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3532 QualType KmpTaskTWithPrivatesPtrQTy, 3533 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3534 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3535 llvm::Value *TaskPrivatesMap) { 3536 ASTContext &C = CGM.getContext(); 3537 FunctionArgList Args; 3538 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3539 ImplicitParamDecl::Other); 3540 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3541 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3542 ImplicitParamDecl::Other); 3543 Args.push_back(&GtidArg); 3544 Args.push_back(&TaskTypeArg); 3545 const auto &TaskEntryFnInfo = 3546 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3547 llvm::FunctionType *TaskEntryTy = 3548 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3549 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3550 auto *TaskEntry = llvm::Function::Create( 3551 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3552 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3553 TaskEntry->setDoesNotRecurse(); 3554 CodeGenFunction CGF(CGM); 3555 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3556 Loc, Loc); 3557 3558 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3559 // tt, 3560 // For taskloops: 3561 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3562 // tt->task_data.shareds); 3563 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3564 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3565 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3566 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3567 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3568 const auto *KmpTaskTWithPrivatesQTyRD = 3569 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3570 LValue Base = 3571 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3572 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3573 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3574 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3575 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3576 3577 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3578 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3579 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3580 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3581 CGF.ConvertTypeForMem(SharedsPtrTy)); 3582 3583 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3584 llvm::Value *PrivatesParam; 3585 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3586 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3587 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3588 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3589 } else { 3590 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3591 } 3592 3593 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3594 TaskPrivatesMap, 3595 CGF.Builder 3596 .CreatePointerBitCastOrAddrSpaceCast( 3597 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3598 .getPointer()}; 3599 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3600 std::end(CommonArgs)); 3601 if (isOpenMPTaskLoopDirective(Kind)) { 3602 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3603 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3604 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3605 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3606 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3607 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3608 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3609 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3610 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3611 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3612 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3613 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3614 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3615 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3616 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3617 CallArgs.push_back(LBParam); 3618 CallArgs.push_back(UBParam); 3619 CallArgs.push_back(StParam); 3620 CallArgs.push_back(LIParam); 3621 CallArgs.push_back(RParam); 3622 } 3623 CallArgs.push_back(SharedsParam); 3624 3625 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3626 CallArgs); 3627 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3628 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3629 CGF.FinishFunction(); 3630 return TaskEntry; 3631 } 3632 3633 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3634 SourceLocation Loc, 3635 QualType KmpInt32Ty, 3636 QualType KmpTaskTWithPrivatesPtrQTy, 3637 QualType KmpTaskTWithPrivatesQTy) { 3638 ASTContext &C = CGM.getContext(); 3639 FunctionArgList Args; 3640 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3641 ImplicitParamDecl::Other); 3642 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3643 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3644 ImplicitParamDecl::Other); 3645 Args.push_back(&GtidArg); 3646 Args.push_back(&TaskTypeArg); 3647 const auto &DestructorFnInfo = 3648 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3649 llvm::FunctionType *DestructorFnTy = 3650 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3651 std::string Name = 3652 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3653 auto *DestructorFn = 3654 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3655 Name, &CGM.getModule()); 3656 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3657 DestructorFnInfo); 3658 DestructorFn->setDoesNotRecurse(); 3659 CodeGenFunction CGF(CGM); 3660 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3661 Args, Loc, Loc); 3662 3663 LValue Base = CGF.EmitLoadOfPointerLValue( 3664 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3665 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3666 const auto *KmpTaskTWithPrivatesQTyRD = 3667 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3668 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3669 Base = CGF.EmitLValueForField(Base, *FI); 3670 for (const auto *Field : 3671 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3672 if (QualType::DestructionKind DtorKind = 3673 Field->getType().isDestructedType()) { 3674 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3675 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3676 } 3677 } 3678 CGF.FinishFunction(); 3679 return DestructorFn; 3680 } 3681 3682 /// Emit a privates mapping function for correct handling of private and 3683 /// firstprivate variables. 3684 /// \code 3685 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3686 /// **noalias priv1,..., <tyn> **noalias privn) { 3687 /// *priv1 = &.privates.priv1; 3688 /// ...; 3689 /// *privn = &.privates.privn; 3690 /// } 3691 /// \endcode 3692 static llvm::Value * 3693 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3694 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3695 ArrayRef<PrivateDataTy> Privates) { 3696 ASTContext &C = CGM.getContext(); 3697 FunctionArgList Args; 3698 ImplicitParamDecl TaskPrivatesArg( 3699 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3700 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3701 ImplicitParamDecl::Other); 3702 Args.push_back(&TaskPrivatesArg); 3703 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3704 unsigned Counter = 1; 3705 for (const Expr *E : Data.PrivateVars) { 3706 Args.push_back(ImplicitParamDecl::Create( 3707 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3708 C.getPointerType(C.getPointerType(E->getType())) 3709 .withConst() 3710 .withRestrict(), 3711 ImplicitParamDecl::Other)); 3712 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3713 PrivateVarsPos[VD] = Counter; 3714 ++Counter; 3715 } 3716 for (const Expr *E : Data.FirstprivateVars) { 3717 Args.push_back(ImplicitParamDecl::Create( 3718 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3719 C.getPointerType(C.getPointerType(E->getType())) 3720 .withConst() 3721 .withRestrict(), 3722 ImplicitParamDecl::Other)); 3723 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3724 PrivateVarsPos[VD] = Counter; 3725 ++Counter; 3726 } 3727 for (const Expr *E : Data.LastprivateVars) { 3728 Args.push_back(ImplicitParamDecl::Create( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(C.getPointerType(E->getType())) 3731 .withConst() 3732 .withRestrict(), 3733 ImplicitParamDecl::Other)); 3734 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3735 PrivateVarsPos[VD] = Counter; 3736 ++Counter; 3737 } 3738 for (const VarDecl *VD : Data.PrivateLocals) { 3739 QualType Ty = VD->getType().getNonReferenceType(); 3740 if (VD->getType()->isLValueReferenceType()) 3741 Ty = C.getPointerType(Ty); 3742 if (isAllocatableDecl(VD)) 3743 Ty = C.getPointerType(Ty); 3744 Args.push_back(ImplicitParamDecl::Create( 3745 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3746 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3747 ImplicitParamDecl::Other)); 3748 PrivateVarsPos[VD] = Counter; 3749 ++Counter; 3750 } 3751 const auto &TaskPrivatesMapFnInfo = 3752 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3753 llvm::FunctionType *TaskPrivatesMapTy = 3754 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3755 std::string Name = 3756 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3757 auto *TaskPrivatesMap = llvm::Function::Create( 3758 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3759 &CGM.getModule()); 3760 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3761 TaskPrivatesMapFnInfo); 3762 if (CGM.getLangOpts().Optimize) { 3763 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3764 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3765 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3766 } 3767 CodeGenFunction CGF(CGM); 3768 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3769 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3770 3771 // *privi = &.privates.privi; 3772 LValue Base = CGF.EmitLoadOfPointerLValue( 3773 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3774 TaskPrivatesArg.getType()->castAs<PointerType>()); 3775 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3776 Counter = 0; 3777 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3778 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3779 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3780 LValue RefLVal = 3781 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3782 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3783 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3784 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3785 ++Counter; 3786 } 3787 CGF.FinishFunction(); 3788 return TaskPrivatesMap; 3789 } 3790 3791 /// Emit initialization for private variables in task-based directives. 3792 static void emitPrivatesInit(CodeGenFunction &CGF, 3793 const OMPExecutableDirective &D, 3794 Address KmpTaskSharedsPtr, LValue TDBase, 3795 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3796 QualType SharedsTy, QualType SharedsPtrTy, 3797 const OMPTaskDataTy &Data, 3798 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3799 ASTContext &C = CGF.getContext(); 3800 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3801 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3802 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3803 ? OMPD_taskloop 3804 : OMPD_task; 3805 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3806 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3807 LValue SrcBase; 3808 bool IsTargetTask = 3809 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3810 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3811 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3812 // PointersArray, SizesArray, and MappersArray. The original variables for 3813 // these arrays are not captured and we get their addresses explicitly. 3814 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3815 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3816 SrcBase = CGF.MakeAddrLValue( 3817 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3818 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3819 SharedsTy); 3820 } 3821 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3822 for (const PrivateDataTy &Pair : Privates) { 3823 // Do not initialize private locals. 3824 if (Pair.second.isLocalPrivate()) { 3825 ++FI; 3826 continue; 3827 } 3828 const VarDecl *VD = Pair.second.PrivateCopy; 3829 const Expr *Init = VD->getAnyInitializer(); 3830 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3831 !CGF.isTrivialInitializer(Init)))) { 3832 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3833 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3834 const VarDecl *OriginalVD = Pair.second.Original; 3835 // Check if the variable is the target-based BasePointersArray, 3836 // PointersArray, SizesArray, or MappersArray. 3837 LValue SharedRefLValue; 3838 QualType Type = PrivateLValue.getType(); 3839 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3840 if (IsTargetTask && !SharedField) { 3841 assert(isa<ImplicitParamDecl>(OriginalVD) && 3842 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3843 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3844 ->getNumParams() == 0 && 3845 isa<TranslationUnitDecl>( 3846 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3847 ->getDeclContext()) && 3848 "Expected artificial target data variable."); 3849 SharedRefLValue = 3850 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3851 } else if (ForDup) { 3852 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3853 SharedRefLValue = CGF.MakeAddrLValue( 3854 Address(SharedRefLValue.getPointer(CGF), 3855 C.getDeclAlign(OriginalVD)), 3856 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3857 SharedRefLValue.getTBAAInfo()); 3858 } else if (CGF.LambdaCaptureFields.count( 3859 Pair.second.Original->getCanonicalDecl()) > 0 || 3860 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3861 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3862 } else { 3863 // Processing for implicitly captured variables. 3864 InlinedOpenMPRegionRAII Region( 3865 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3866 /*HasCancel=*/false, /*NoInheritance=*/true); 3867 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3868 } 3869 if (Type->isArrayType()) { 3870 // Initialize firstprivate array. 3871 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3872 // Perform simple memcpy. 3873 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3874 } else { 3875 // Initialize firstprivate array using element-by-element 3876 // initialization. 3877 CGF.EmitOMPAggregateAssign( 3878 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3879 Type, 3880 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3881 Address SrcElement) { 3882 // Clean up any temporaries needed by the initialization. 3883 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3884 InitScope.addPrivate( 3885 Elem, [SrcElement]() -> Address { return SrcElement; }); 3886 (void)InitScope.Privatize(); 3887 // Emit initialization for single element. 3888 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3889 CGF, &CapturesInfo); 3890 CGF.EmitAnyExprToMem(Init, DestElement, 3891 Init->getType().getQualifiers(), 3892 /*IsInitializer=*/false); 3893 }); 3894 } 3895 } else { 3896 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3897 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3898 return SharedRefLValue.getAddress(CGF); 3899 }); 3900 (void)InitScope.Privatize(); 3901 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3902 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3903 /*capturedByInit=*/false); 3904 } 3905 } else { 3906 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3907 } 3908 } 3909 ++FI; 3910 } 3911 } 3912 3913 /// Check if duplication function is required for taskloops. 3914 static bool checkInitIsRequired(CodeGenFunction &CGF, 3915 ArrayRef<PrivateDataTy> Privates) { 3916 bool InitRequired = false; 3917 for (const PrivateDataTy &Pair : Privates) { 3918 if (Pair.second.isLocalPrivate()) 3919 continue; 3920 const VarDecl *VD = Pair.second.PrivateCopy; 3921 const Expr *Init = VD->getAnyInitializer(); 3922 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3923 !CGF.isTrivialInitializer(Init)); 3924 if (InitRequired) 3925 break; 3926 } 3927 return InitRequired; 3928 } 3929 3930 3931 /// Emit task_dup function (for initialization of 3932 /// private/firstprivate/lastprivate vars and last_iter flag) 3933 /// \code 3934 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3935 /// lastpriv) { 3936 /// // setup lastprivate flag 3937 /// task_dst->last = lastpriv; 3938 /// // could be constructor calls here... 3939 /// } 3940 /// \endcode 3941 static llvm::Value * 3942 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3943 const OMPExecutableDirective &D, 3944 QualType KmpTaskTWithPrivatesPtrQTy, 3945 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3946 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3947 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3948 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3949 ASTContext &C = CGM.getContext(); 3950 FunctionArgList Args; 3951 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3952 KmpTaskTWithPrivatesPtrQTy, 3953 ImplicitParamDecl::Other); 3954 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3955 KmpTaskTWithPrivatesPtrQTy, 3956 ImplicitParamDecl::Other); 3957 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3958 ImplicitParamDecl::Other); 3959 Args.push_back(&DstArg); 3960 Args.push_back(&SrcArg); 3961 Args.push_back(&LastprivArg); 3962 const auto &TaskDupFnInfo = 3963 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3964 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3965 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3966 auto *TaskDup = llvm::Function::Create( 3967 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3968 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3969 TaskDup->setDoesNotRecurse(); 3970 CodeGenFunction CGF(CGM); 3971 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3972 Loc); 3973 3974 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3975 CGF.GetAddrOfLocalVar(&DstArg), 3976 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3977 // task_dst->liter = lastpriv; 3978 if (WithLastIter) { 3979 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3980 LValue Base = CGF.EmitLValueForField( 3981 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3982 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3983 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3984 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3985 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3986 } 3987 3988 // Emit initial values for private copies (if any). 3989 assert(!Privates.empty()); 3990 Address KmpTaskSharedsPtr = Address::invalid(); 3991 if (!Data.FirstprivateVars.empty()) { 3992 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3993 CGF.GetAddrOfLocalVar(&SrcArg), 3994 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3995 LValue Base = CGF.EmitLValueForField( 3996 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3997 KmpTaskSharedsPtr = Address( 3998 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3999 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4000 KmpTaskTShareds)), 4001 Loc), 4002 CGM.getNaturalTypeAlignment(SharedsTy)); 4003 } 4004 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4005 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4006 CGF.FinishFunction(); 4007 return TaskDup; 4008 } 4009 4010 /// Checks if destructor function is required to be generated. 4011 /// \return true if cleanups are required, false otherwise. 4012 static bool 4013 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4014 ArrayRef<PrivateDataTy> Privates) { 4015 for (const PrivateDataTy &P : Privates) { 4016 if (P.second.isLocalPrivate()) 4017 continue; 4018 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4019 if (Ty.isDestructedType()) 4020 return true; 4021 } 4022 return false; 4023 } 4024 4025 namespace { 4026 /// Loop generator for OpenMP iterator expression. 4027 class OMPIteratorGeneratorScope final 4028 : public CodeGenFunction::OMPPrivateScope { 4029 CodeGenFunction &CGF; 4030 const OMPIteratorExpr *E = nullptr; 4031 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4032 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4033 OMPIteratorGeneratorScope() = delete; 4034 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4035 4036 public: 4037 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4038 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4039 if (!E) 4040 return; 4041 SmallVector<llvm::Value *, 4> Uppers; 4042 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4043 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4044 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4045 addPrivate(VD, [&CGF, VD]() { 4046 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4047 }); 4048 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4049 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4050 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4051 "counter.addr"); 4052 }); 4053 } 4054 Privatize(); 4055 4056 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4057 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4058 LValue CLVal = 4059 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4060 HelperData.CounterVD->getType()); 4061 // Counter = 0; 4062 CGF.EmitStoreOfScalar( 4063 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4064 CLVal); 4065 CodeGenFunction::JumpDest &ContDest = 4066 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4067 CodeGenFunction::JumpDest &ExitDest = 4068 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4069 // N = <number-of_iterations>; 4070 llvm::Value *N = Uppers[I]; 4071 // cont: 4072 // if (Counter < N) goto body; else goto exit; 4073 CGF.EmitBlock(ContDest.getBlock()); 4074 auto *CVal = 4075 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4076 llvm::Value *Cmp = 4077 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4078 ? CGF.Builder.CreateICmpSLT(CVal, N) 4079 : CGF.Builder.CreateICmpULT(CVal, N); 4080 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4081 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4082 // body: 4083 CGF.EmitBlock(BodyBB); 4084 // Iteri = Begini + Counter * Stepi; 4085 CGF.EmitIgnoredExpr(HelperData.Update); 4086 } 4087 } 4088 ~OMPIteratorGeneratorScope() { 4089 if (!E) 4090 return; 4091 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4092 // Counter = Counter + 1; 4093 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4094 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4095 // goto cont; 4096 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4097 // exit: 4098 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4099 } 4100 } 4101 }; 4102 } // namespace 4103 4104 static std::pair<llvm::Value *, llvm::Value *> 4105 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4106 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4107 llvm::Value *Addr; 4108 if (OASE) { 4109 const Expr *Base = OASE->getBase(); 4110 Addr = CGF.EmitScalarExpr(Base); 4111 } else { 4112 Addr = CGF.EmitLValue(E).getPointer(CGF); 4113 } 4114 llvm::Value *SizeVal; 4115 QualType Ty = E->getType(); 4116 if (OASE) { 4117 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4118 for (const Expr *SE : OASE->getDimensions()) { 4119 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4120 Sz = CGF.EmitScalarConversion( 4121 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4122 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4123 } 4124 } else if (const auto *ASE = 4125 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4126 LValue UpAddrLVal = 4127 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4128 llvm::Value *UpAddr = 4129 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4130 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4131 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4132 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4133 } else { 4134 SizeVal = CGF.getTypeSize(Ty); 4135 } 4136 return std::make_pair(Addr, SizeVal); 4137 } 4138 4139 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4140 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4141 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4142 if (KmpTaskAffinityInfoTy.isNull()) { 4143 RecordDecl *KmpAffinityInfoRD = 4144 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4145 KmpAffinityInfoRD->startDefinition(); 4146 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4147 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4148 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4149 KmpAffinityInfoRD->completeDefinition(); 4150 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4151 } 4152 } 4153 4154 CGOpenMPRuntime::TaskResultTy 4155 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4156 const OMPExecutableDirective &D, 4157 llvm::Function *TaskFunction, QualType SharedsTy, 4158 Address Shareds, const OMPTaskDataTy &Data) { 4159 ASTContext &C = CGM.getContext(); 4160 llvm::SmallVector<PrivateDataTy, 4> Privates; 4161 // Aggregate privates and sort them by the alignment. 4162 const auto *I = Data.PrivateCopies.begin(); 4163 for (const Expr *E : Data.PrivateVars) { 4164 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4165 Privates.emplace_back( 4166 C.getDeclAlign(VD), 4167 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4168 /*PrivateElemInit=*/nullptr)); 4169 ++I; 4170 } 4171 I = Data.FirstprivateCopies.begin(); 4172 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4173 for (const Expr *E : Data.FirstprivateVars) { 4174 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4175 Privates.emplace_back( 4176 C.getDeclAlign(VD), 4177 PrivateHelpersTy( 4178 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4179 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4180 ++I; 4181 ++IElemInitRef; 4182 } 4183 I = Data.LastprivateCopies.begin(); 4184 for (const Expr *E : Data.LastprivateVars) { 4185 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4186 Privates.emplace_back( 4187 C.getDeclAlign(VD), 4188 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4189 /*PrivateElemInit=*/nullptr)); 4190 ++I; 4191 } 4192 for (const VarDecl *VD : Data.PrivateLocals) { 4193 if (isAllocatableDecl(VD)) 4194 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4195 else 4196 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4197 } 4198 llvm::stable_sort(Privates, 4199 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4200 return L.first > R.first; 4201 }); 4202 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4203 // Build type kmp_routine_entry_t (if not built yet). 4204 emitKmpRoutineEntryT(KmpInt32Ty); 4205 // Build type kmp_task_t (if not built yet). 4206 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4207 if (SavedKmpTaskloopTQTy.isNull()) { 4208 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4209 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4210 } 4211 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4212 } else { 4213 assert((D.getDirectiveKind() == OMPD_task || 4214 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4215 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4216 "Expected taskloop, task or target directive"); 4217 if (SavedKmpTaskTQTy.isNull()) { 4218 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4219 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4220 } 4221 KmpTaskTQTy = SavedKmpTaskTQTy; 4222 } 4223 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4224 // Build particular struct kmp_task_t for the given task. 4225 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4226 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4227 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4228 QualType KmpTaskTWithPrivatesPtrQTy = 4229 C.getPointerType(KmpTaskTWithPrivatesQTy); 4230 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4231 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4232 KmpTaskTWithPrivatesTy->getPointerTo(); 4233 llvm::Value *KmpTaskTWithPrivatesTySize = 4234 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4235 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4236 4237 // Emit initial values for private copies (if any). 4238 llvm::Value *TaskPrivatesMap = nullptr; 4239 llvm::Type *TaskPrivatesMapTy = 4240 std::next(TaskFunction->arg_begin(), 3)->getType(); 4241 if (!Privates.empty()) { 4242 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4243 TaskPrivatesMap = 4244 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4245 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4246 TaskPrivatesMap, TaskPrivatesMapTy); 4247 } else { 4248 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4249 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4250 } 4251 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4252 // kmp_task_t *tt); 4253 llvm::Function *TaskEntry = emitProxyTaskFunction( 4254 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4255 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4256 TaskPrivatesMap); 4257 4258 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4259 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4260 // kmp_routine_entry_t *task_entry); 4261 // Task flags. Format is taken from 4262 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4263 // description of kmp_tasking_flags struct. 4264 enum { 4265 TiedFlag = 0x1, 4266 FinalFlag = 0x2, 4267 DestructorsFlag = 0x8, 4268 PriorityFlag = 0x20, 4269 DetachableFlag = 0x40, 4270 }; 4271 unsigned Flags = Data.Tied ? TiedFlag : 0; 4272 bool NeedsCleanup = false; 4273 if (!Privates.empty()) { 4274 NeedsCleanup = 4275 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4276 if (NeedsCleanup) 4277 Flags = Flags | DestructorsFlag; 4278 } 4279 if (Data.Priority.getInt()) 4280 Flags = Flags | PriorityFlag; 4281 if (D.hasClausesOfKind<OMPDetachClause>()) 4282 Flags = Flags | DetachableFlag; 4283 llvm::Value *TaskFlags = 4284 Data.Final.getPointer() 4285 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4286 CGF.Builder.getInt32(FinalFlag), 4287 CGF.Builder.getInt32(/*C=*/0)) 4288 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4289 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4290 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4291 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4292 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4293 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4294 TaskEntry, KmpRoutineEntryPtrTy)}; 4295 llvm::Value *NewTask; 4296 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4297 // Check if we have any device clause associated with the directive. 4298 const Expr *Device = nullptr; 4299 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4300 Device = C->getDevice(); 4301 // Emit device ID if any otherwise use default value. 4302 llvm::Value *DeviceID; 4303 if (Device) 4304 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4305 CGF.Int64Ty, /*isSigned=*/true); 4306 else 4307 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4308 AllocArgs.push_back(DeviceID); 4309 NewTask = CGF.EmitRuntimeCall( 4310 OMPBuilder.getOrCreateRuntimeFunction( 4311 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4312 AllocArgs); 4313 } else { 4314 NewTask = 4315 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4316 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4317 AllocArgs); 4318 } 4319 // Emit detach clause initialization. 4320 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4321 // task_descriptor); 4322 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4323 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4324 LValue EvtLVal = CGF.EmitLValue(Evt); 4325 4326 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4327 // int gtid, kmp_task_t *task); 4328 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4329 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4330 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4331 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4332 OMPBuilder.getOrCreateRuntimeFunction( 4333 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4334 {Loc, Tid, NewTask}); 4335 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4336 Evt->getExprLoc()); 4337 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4338 } 4339 // Process affinity clauses. 4340 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4341 // Process list of affinity data. 4342 ASTContext &C = CGM.getContext(); 4343 Address AffinitiesArray = Address::invalid(); 4344 // Calculate number of elements to form the array of affinity data. 4345 llvm::Value *NumOfElements = nullptr; 4346 unsigned NumAffinities = 0; 4347 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4348 if (const Expr *Modifier = C->getModifier()) { 4349 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4350 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4351 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4352 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4353 NumOfElements = 4354 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4355 } 4356 } else { 4357 NumAffinities += C->varlist_size(); 4358 } 4359 } 4360 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4361 // Fields ids in kmp_task_affinity_info record. 4362 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4363 4364 QualType KmpTaskAffinityInfoArrayTy; 4365 if (NumOfElements) { 4366 NumOfElements = CGF.Builder.CreateNUWAdd( 4367 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4368 OpaqueValueExpr OVE( 4369 Loc, 4370 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4371 VK_RValue); 4372 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4373 RValue::get(NumOfElements)); 4374 KmpTaskAffinityInfoArrayTy = 4375 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4376 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4377 // Properly emit variable-sized array. 4378 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4379 ImplicitParamDecl::Other); 4380 CGF.EmitVarDecl(*PD); 4381 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4382 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4383 /*isSigned=*/false); 4384 } else { 4385 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4386 KmpTaskAffinityInfoTy, 4387 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4388 ArrayType::Normal, /*IndexTypeQuals=*/0); 4389 AffinitiesArray = 4390 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4391 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4392 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4393 /*isSigned=*/false); 4394 } 4395 4396 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4397 // Fill array by elements without iterators. 4398 unsigned Pos = 0; 4399 bool HasIterator = false; 4400 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4401 if (C->getModifier()) { 4402 HasIterator = true; 4403 continue; 4404 } 4405 for (const Expr *E : C->varlists()) { 4406 llvm::Value *Addr; 4407 llvm::Value *Size; 4408 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4409 LValue Base = 4410 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4411 KmpTaskAffinityInfoTy); 4412 // affs[i].base_addr = &<Affinities[i].second>; 4413 LValue BaseAddrLVal = CGF.EmitLValueForField( 4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4415 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4416 BaseAddrLVal); 4417 // affs[i].len = sizeof(<Affinities[i].second>); 4418 LValue LenLVal = CGF.EmitLValueForField( 4419 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4420 CGF.EmitStoreOfScalar(Size, LenLVal); 4421 ++Pos; 4422 } 4423 } 4424 LValue PosLVal; 4425 if (HasIterator) { 4426 PosLVal = CGF.MakeAddrLValue( 4427 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4428 C.getSizeType()); 4429 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4430 } 4431 // Process elements with iterators. 4432 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4433 const Expr *Modifier = C->getModifier(); 4434 if (!Modifier) 4435 continue; 4436 OMPIteratorGeneratorScope IteratorScope( 4437 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4438 for (const Expr *E : C->varlists()) { 4439 llvm::Value *Addr; 4440 llvm::Value *Size; 4441 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4442 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4443 LValue Base = CGF.MakeAddrLValue( 4444 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4445 AffinitiesArray.getAlignment()), 4446 KmpTaskAffinityInfoTy); 4447 // affs[i].base_addr = &<Affinities[i].second>; 4448 LValue BaseAddrLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4450 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4451 BaseAddrLVal); 4452 // affs[i].len = sizeof(<Affinities[i].second>); 4453 LValue LenLVal = CGF.EmitLValueForField( 4454 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4455 CGF.EmitStoreOfScalar(Size, LenLVal); 4456 Idx = CGF.Builder.CreateNUWAdd( 4457 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4458 CGF.EmitStoreOfScalar(Idx, PosLVal); 4459 } 4460 } 4461 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4462 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4463 // naffins, kmp_task_affinity_info_t *affin_list); 4464 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4465 llvm::Value *GTid = getThreadID(CGF, Loc); 4466 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4467 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4468 // FIXME: Emit the function and ignore its result for now unless the 4469 // runtime function is properly implemented. 4470 (void)CGF.EmitRuntimeCall( 4471 OMPBuilder.getOrCreateRuntimeFunction( 4472 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4473 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4474 } 4475 llvm::Value *NewTaskNewTaskTTy = 4476 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4477 NewTask, KmpTaskTWithPrivatesPtrTy); 4478 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4479 KmpTaskTWithPrivatesQTy); 4480 LValue TDBase = 4481 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4482 // Fill the data in the resulting kmp_task_t record. 4483 // Copy shareds if there are any. 4484 Address KmpTaskSharedsPtr = Address::invalid(); 4485 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4486 KmpTaskSharedsPtr = 4487 Address(CGF.EmitLoadOfScalar( 4488 CGF.EmitLValueForField( 4489 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4490 KmpTaskTShareds)), 4491 Loc), 4492 CGM.getNaturalTypeAlignment(SharedsTy)); 4493 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4494 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4495 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4496 } 4497 // Emit initial values for private copies (if any). 4498 TaskResultTy Result; 4499 if (!Privates.empty()) { 4500 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4501 SharedsTy, SharedsPtrTy, Data, Privates, 4502 /*ForDup=*/false); 4503 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4504 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4505 Result.TaskDupFn = emitTaskDupFunction( 4506 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4507 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4508 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4509 } 4510 } 4511 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4512 enum { Priority = 0, Destructors = 1 }; 4513 // Provide pointer to function with destructors for privates. 4514 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4515 const RecordDecl *KmpCmplrdataUD = 4516 (*FI)->getType()->getAsUnionType()->getDecl(); 4517 if (NeedsCleanup) { 4518 llvm::Value *DestructorFn = emitDestructorsFunction( 4519 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4520 KmpTaskTWithPrivatesQTy); 4521 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4522 LValue DestructorsLV = CGF.EmitLValueForField( 4523 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4524 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4525 DestructorFn, KmpRoutineEntryPtrTy), 4526 DestructorsLV); 4527 } 4528 // Set priority. 4529 if (Data.Priority.getInt()) { 4530 LValue Data2LV = CGF.EmitLValueForField( 4531 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4532 LValue PriorityLV = CGF.EmitLValueForField( 4533 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4534 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4535 } 4536 Result.NewTask = NewTask; 4537 Result.TaskEntry = TaskEntry; 4538 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4539 Result.TDBase = TDBase; 4540 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4541 return Result; 4542 } 4543 4544 namespace { 4545 /// Dependence kind for RTL. 4546 enum RTLDependenceKindTy { 4547 DepIn = 0x01, 4548 DepInOut = 0x3, 4549 DepMutexInOutSet = 0x4 4550 }; 4551 /// Fields ids in kmp_depend_info record. 4552 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4553 } // namespace 4554 4555 /// Translates internal dependency kind into the runtime kind. 4556 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4557 RTLDependenceKindTy DepKind; 4558 switch (K) { 4559 case OMPC_DEPEND_in: 4560 DepKind = DepIn; 4561 break; 4562 // Out and InOut dependencies must use the same code. 4563 case OMPC_DEPEND_out: 4564 case OMPC_DEPEND_inout: 4565 DepKind = DepInOut; 4566 break; 4567 case OMPC_DEPEND_mutexinoutset: 4568 DepKind = DepMutexInOutSet; 4569 break; 4570 case OMPC_DEPEND_source: 4571 case OMPC_DEPEND_sink: 4572 case OMPC_DEPEND_depobj: 4573 case OMPC_DEPEND_unknown: 4574 llvm_unreachable("Unknown task dependence type"); 4575 } 4576 return DepKind; 4577 } 4578 4579 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4580 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4581 QualType &FlagsTy) { 4582 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4583 if (KmpDependInfoTy.isNull()) { 4584 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4585 KmpDependInfoRD->startDefinition(); 4586 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4587 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4588 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4589 KmpDependInfoRD->completeDefinition(); 4590 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4591 } 4592 } 4593 4594 std::pair<llvm::Value *, LValue> 4595 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4596 SourceLocation Loc) { 4597 ASTContext &C = CGM.getContext(); 4598 QualType FlagsTy; 4599 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4600 RecordDecl *KmpDependInfoRD = 4601 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4602 LValue Base = CGF.EmitLoadOfPointerLValue( 4603 DepobjLVal.getAddress(CGF), 4604 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4605 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4606 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4607 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4608 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4609 Base.getTBAAInfo()); 4610 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4611 Addr.getPointer(), 4612 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4613 LValue NumDepsBase = CGF.MakeAddrLValue( 4614 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4615 Base.getBaseInfo(), Base.getTBAAInfo()); 4616 // NumDeps = deps[i].base_addr; 4617 LValue BaseAddrLVal = CGF.EmitLValueForField( 4618 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4619 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4620 return std::make_pair(NumDeps, Base); 4621 } 4622 4623 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4624 llvm::PointerUnion<unsigned *, LValue *> Pos, 4625 const OMPTaskDataTy::DependData &Data, 4626 Address DependenciesArray) { 4627 CodeGenModule &CGM = CGF.CGM; 4628 ASTContext &C = CGM.getContext(); 4629 QualType FlagsTy; 4630 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4631 RecordDecl *KmpDependInfoRD = 4632 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4633 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4634 4635 OMPIteratorGeneratorScope IteratorScope( 4636 CGF, cast_or_null<OMPIteratorExpr>( 4637 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4638 : nullptr)); 4639 for (const Expr *E : Data.DepExprs) { 4640 llvm::Value *Addr; 4641 llvm::Value *Size; 4642 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4643 LValue Base; 4644 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4645 Base = CGF.MakeAddrLValue( 4646 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4647 } else { 4648 LValue &PosLVal = *Pos.get<LValue *>(); 4649 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4650 Base = CGF.MakeAddrLValue( 4651 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4652 DependenciesArray.getAlignment()), 4653 KmpDependInfoTy); 4654 } 4655 // deps[i].base_addr = &<Dependencies[i].second>; 4656 LValue BaseAddrLVal = CGF.EmitLValueForField( 4657 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4658 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4659 BaseAddrLVal); 4660 // deps[i].len = sizeof(<Dependencies[i].second>); 4661 LValue LenLVal = CGF.EmitLValueForField( 4662 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4663 CGF.EmitStoreOfScalar(Size, LenLVal); 4664 // deps[i].flags = <Dependencies[i].first>; 4665 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4666 LValue FlagsLVal = CGF.EmitLValueForField( 4667 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4668 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4669 FlagsLVal); 4670 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4671 ++(*P); 4672 } else { 4673 LValue &PosLVal = *Pos.get<LValue *>(); 4674 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4675 Idx = CGF.Builder.CreateNUWAdd(Idx, 4676 llvm::ConstantInt::get(Idx->getType(), 1)); 4677 CGF.EmitStoreOfScalar(Idx, PosLVal); 4678 } 4679 } 4680 } 4681 4682 static SmallVector<llvm::Value *, 4> 4683 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4684 const OMPTaskDataTy::DependData &Data) { 4685 assert(Data.DepKind == OMPC_DEPEND_depobj && 4686 "Expected depobj dependecy kind."); 4687 SmallVector<llvm::Value *, 4> Sizes; 4688 SmallVector<LValue, 4> SizeLVals; 4689 ASTContext &C = CGF.getContext(); 4690 QualType FlagsTy; 4691 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4692 RecordDecl *KmpDependInfoRD = 4693 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4694 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4695 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4696 { 4697 OMPIteratorGeneratorScope IteratorScope( 4698 CGF, cast_or_null<OMPIteratorExpr>( 4699 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4700 : nullptr)); 4701 for (const Expr *E : Data.DepExprs) { 4702 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4703 LValue Base = CGF.EmitLoadOfPointerLValue( 4704 DepobjLVal.getAddress(CGF), 4705 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4706 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4707 Base.getAddress(CGF), KmpDependInfoPtrT); 4708 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4709 Base.getTBAAInfo()); 4710 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4711 Addr.getPointer(), 4712 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4713 LValue NumDepsBase = CGF.MakeAddrLValue( 4714 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4715 Base.getBaseInfo(), Base.getTBAAInfo()); 4716 // NumDeps = deps[i].base_addr; 4717 LValue BaseAddrLVal = CGF.EmitLValueForField( 4718 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4719 llvm::Value *NumDeps = 4720 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4721 LValue NumLVal = CGF.MakeAddrLValue( 4722 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4723 C.getUIntPtrType()); 4724 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4725 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4726 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4727 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4728 CGF.EmitStoreOfScalar(Add, NumLVal); 4729 SizeLVals.push_back(NumLVal); 4730 } 4731 } 4732 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4733 llvm::Value *Size = 4734 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4735 Sizes.push_back(Size); 4736 } 4737 return Sizes; 4738 } 4739 4740 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4741 LValue PosLVal, 4742 const OMPTaskDataTy::DependData &Data, 4743 Address DependenciesArray) { 4744 assert(Data.DepKind == OMPC_DEPEND_depobj && 4745 "Expected depobj dependecy kind."); 4746 ASTContext &C = CGF.getContext(); 4747 QualType FlagsTy; 4748 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4749 RecordDecl *KmpDependInfoRD = 4750 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4751 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4752 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4753 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4754 { 4755 OMPIteratorGeneratorScope IteratorScope( 4756 CGF, cast_or_null<OMPIteratorExpr>( 4757 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4758 : nullptr)); 4759 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4760 const Expr *E = Data.DepExprs[I]; 4761 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4762 LValue Base = CGF.EmitLoadOfPointerLValue( 4763 DepobjLVal.getAddress(CGF), 4764 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4765 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4766 Base.getAddress(CGF), KmpDependInfoPtrT); 4767 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4768 Base.getTBAAInfo()); 4769 4770 // Get number of elements in a single depobj. 4771 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4772 Addr.getPointer(), 4773 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4774 LValue NumDepsBase = CGF.MakeAddrLValue( 4775 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4776 Base.getBaseInfo(), Base.getTBAAInfo()); 4777 // NumDeps = deps[i].base_addr; 4778 LValue BaseAddrLVal = CGF.EmitLValueForField( 4779 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4780 llvm::Value *NumDeps = 4781 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4782 4783 // memcopy dependency data. 4784 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4785 ElSize, 4786 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4787 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4788 Address DepAddr = 4789 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4790 DependenciesArray.getAlignment()); 4791 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4792 4793 // Increase pos. 4794 // pos += size; 4795 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4796 CGF.EmitStoreOfScalar(Add, PosLVal); 4797 } 4798 } 4799 } 4800 4801 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4802 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4803 SourceLocation Loc) { 4804 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4805 return D.DepExprs.empty(); 4806 })) 4807 return std::make_pair(nullptr, Address::invalid()); 4808 // Process list of dependencies. 4809 ASTContext &C = CGM.getContext(); 4810 Address DependenciesArray = Address::invalid(); 4811 llvm::Value *NumOfElements = nullptr; 4812 unsigned NumDependencies = std::accumulate( 4813 Dependencies.begin(), Dependencies.end(), 0, 4814 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4815 return D.DepKind == OMPC_DEPEND_depobj 4816 ? V 4817 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4818 }); 4819 QualType FlagsTy; 4820 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4821 bool HasDepobjDeps = false; 4822 bool HasRegularWithIterators = false; 4823 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4824 llvm::Value *NumOfRegularWithIterators = 4825 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4826 // Calculate number of depobj dependecies and regular deps with the iterators. 4827 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4828 if (D.DepKind == OMPC_DEPEND_depobj) { 4829 SmallVector<llvm::Value *, 4> Sizes = 4830 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4831 for (llvm::Value *Size : Sizes) { 4832 NumOfDepobjElements = 4833 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4834 } 4835 HasDepobjDeps = true; 4836 continue; 4837 } 4838 // Include number of iterations, if any. 4839 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4840 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4841 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4842 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4843 NumOfRegularWithIterators = 4844 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4845 } 4846 HasRegularWithIterators = true; 4847 continue; 4848 } 4849 } 4850 4851 QualType KmpDependInfoArrayTy; 4852 if (HasDepobjDeps || HasRegularWithIterators) { 4853 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4854 /*isSigned=*/false); 4855 if (HasDepobjDeps) { 4856 NumOfElements = 4857 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4858 } 4859 if (HasRegularWithIterators) { 4860 NumOfElements = 4861 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4862 } 4863 OpaqueValueExpr OVE(Loc, 4864 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4865 VK_RValue); 4866 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4867 RValue::get(NumOfElements)); 4868 KmpDependInfoArrayTy = 4869 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4870 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4871 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4872 // Properly emit variable-sized array. 4873 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4874 ImplicitParamDecl::Other); 4875 CGF.EmitVarDecl(*PD); 4876 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4877 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4878 /*isSigned=*/false); 4879 } else { 4880 KmpDependInfoArrayTy = C.getConstantArrayType( 4881 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4882 ArrayType::Normal, /*IndexTypeQuals=*/0); 4883 DependenciesArray = 4884 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4885 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4886 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4887 /*isSigned=*/false); 4888 } 4889 unsigned Pos = 0; 4890 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4891 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4892 Dependencies[I].IteratorExpr) 4893 continue; 4894 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4895 DependenciesArray); 4896 } 4897 // Copy regular dependecies with iterators. 4898 LValue PosLVal = CGF.MakeAddrLValue( 4899 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4900 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4901 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4902 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4903 !Dependencies[I].IteratorExpr) 4904 continue; 4905 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4906 DependenciesArray); 4907 } 4908 // Copy final depobj arrays without iterators. 4909 if (HasDepobjDeps) { 4910 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4911 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4912 continue; 4913 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4914 DependenciesArray); 4915 } 4916 } 4917 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4918 DependenciesArray, CGF.VoidPtrTy); 4919 return std::make_pair(NumOfElements, DependenciesArray); 4920 } 4921 4922 Address CGOpenMPRuntime::emitDepobjDependClause( 4923 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4924 SourceLocation Loc) { 4925 if (Dependencies.DepExprs.empty()) 4926 return Address::invalid(); 4927 // Process list of dependencies. 4928 ASTContext &C = CGM.getContext(); 4929 Address DependenciesArray = Address::invalid(); 4930 unsigned NumDependencies = Dependencies.DepExprs.size(); 4931 QualType FlagsTy; 4932 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4933 RecordDecl *KmpDependInfoRD = 4934 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4935 4936 llvm::Value *Size; 4937 // Define type kmp_depend_info[<Dependencies.size()>]; 4938 // For depobj reserve one extra element to store the number of elements. 4939 // It is required to handle depobj(x) update(in) construct. 4940 // kmp_depend_info[<Dependencies.size()>] deps; 4941 llvm::Value *NumDepsVal; 4942 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4943 if (const auto *IE = 4944 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4945 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4946 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4947 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4948 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4949 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4950 } 4951 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4952 NumDepsVal); 4953 CharUnits SizeInBytes = 4954 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4955 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4956 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4957 NumDepsVal = 4958 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4959 } else { 4960 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4961 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4962 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4963 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4964 Size = CGM.getSize(Sz.alignTo(Align)); 4965 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4966 } 4967 // Need to allocate on the dynamic memory. 4968 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4969 // Use default allocator. 4970 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4971 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4972 4973 llvm::Value *Addr = 4974 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4975 CGM.getModule(), OMPRTL___kmpc_alloc), 4976 Args, ".dep.arr.addr"); 4977 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4978 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4979 DependenciesArray = Address(Addr, Align); 4980 // Write number of elements in the first element of array for depobj. 4981 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4982 // deps[i].base_addr = NumDependencies; 4983 LValue BaseAddrLVal = CGF.EmitLValueForField( 4984 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4985 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4986 llvm::PointerUnion<unsigned *, LValue *> Pos; 4987 unsigned Idx = 1; 4988 LValue PosLVal; 4989 if (Dependencies.IteratorExpr) { 4990 PosLVal = CGF.MakeAddrLValue( 4991 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4992 C.getSizeType()); 4993 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4994 /*IsInit=*/true); 4995 Pos = &PosLVal; 4996 } else { 4997 Pos = &Idx; 4998 } 4999 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5000 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5001 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5002 return DependenciesArray; 5003 } 5004 5005 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5006 SourceLocation Loc) { 5007 ASTContext &C = CGM.getContext(); 5008 QualType FlagsTy; 5009 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5010 LValue Base = CGF.EmitLoadOfPointerLValue( 5011 DepobjLVal.getAddress(CGF), 5012 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5013 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5014 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5015 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5016 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5017 Addr.getPointer(), 5018 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5019 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5020 CGF.VoidPtrTy); 5021 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5022 // Use default allocator. 5023 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5024 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5025 5026 // _kmpc_free(gtid, addr, nullptr); 5027 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5028 CGM.getModule(), OMPRTL___kmpc_free), 5029 Args); 5030 } 5031 5032 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5033 OpenMPDependClauseKind NewDepKind, 5034 SourceLocation Loc) { 5035 ASTContext &C = CGM.getContext(); 5036 QualType FlagsTy; 5037 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5038 RecordDecl *KmpDependInfoRD = 5039 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5040 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5041 llvm::Value *NumDeps; 5042 LValue Base; 5043 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5044 5045 Address Begin = Base.getAddress(CGF); 5046 // Cast from pointer to array type to pointer to single element. 5047 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5048 // The basic structure here is a while-do loop. 5049 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5050 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5051 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5052 CGF.EmitBlock(BodyBB); 5053 llvm::PHINode *ElementPHI = 5054 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5055 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5056 Begin = Address(ElementPHI, Begin.getAlignment()); 5057 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5058 Base.getTBAAInfo()); 5059 // deps[i].flags = NewDepKind; 5060 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5061 LValue FlagsLVal = CGF.EmitLValueForField( 5062 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5063 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5064 FlagsLVal); 5065 5066 // Shift the address forward by one element. 5067 Address ElementNext = 5068 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5069 ElementPHI->addIncoming(ElementNext.getPointer(), 5070 CGF.Builder.GetInsertBlock()); 5071 llvm::Value *IsEmpty = 5072 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5073 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5074 // Done. 5075 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5076 } 5077 5078 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5079 const OMPExecutableDirective &D, 5080 llvm::Function *TaskFunction, 5081 QualType SharedsTy, Address Shareds, 5082 const Expr *IfCond, 5083 const OMPTaskDataTy &Data) { 5084 if (!CGF.HaveInsertPoint()) 5085 return; 5086 5087 TaskResultTy Result = 5088 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5089 llvm::Value *NewTask = Result.NewTask; 5090 llvm::Function *TaskEntry = Result.TaskEntry; 5091 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5092 LValue TDBase = Result.TDBase; 5093 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5094 // Process list of dependences. 5095 Address DependenciesArray = Address::invalid(); 5096 llvm::Value *NumOfElements; 5097 std::tie(NumOfElements, DependenciesArray) = 5098 emitDependClause(CGF, Data.Dependences, Loc); 5099 5100 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5101 // libcall. 5102 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5103 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5104 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5105 // list is not empty 5106 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5107 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5108 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5109 llvm::Value *DepTaskArgs[7]; 5110 if (!Data.Dependences.empty()) { 5111 DepTaskArgs[0] = UpLoc; 5112 DepTaskArgs[1] = ThreadID; 5113 DepTaskArgs[2] = NewTask; 5114 DepTaskArgs[3] = NumOfElements; 5115 DepTaskArgs[4] = DependenciesArray.getPointer(); 5116 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5117 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5118 } 5119 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5120 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5121 if (!Data.Tied) { 5122 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5123 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5124 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5125 } 5126 if (!Data.Dependences.empty()) { 5127 CGF.EmitRuntimeCall( 5128 OMPBuilder.getOrCreateRuntimeFunction( 5129 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5130 DepTaskArgs); 5131 } else { 5132 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5133 CGM.getModule(), OMPRTL___kmpc_omp_task), 5134 TaskArgs); 5135 } 5136 // Check if parent region is untied and build return for untied task; 5137 if (auto *Region = 5138 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5139 Region->emitUntiedSwitch(CGF); 5140 }; 5141 5142 llvm::Value *DepWaitTaskArgs[6]; 5143 if (!Data.Dependences.empty()) { 5144 DepWaitTaskArgs[0] = UpLoc; 5145 DepWaitTaskArgs[1] = ThreadID; 5146 DepWaitTaskArgs[2] = NumOfElements; 5147 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5148 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5149 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5150 } 5151 auto &M = CGM.getModule(); 5152 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5153 TaskEntry, &Data, &DepWaitTaskArgs, 5154 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5155 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5156 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5157 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5158 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5159 // is specified. 5160 if (!Data.Dependences.empty()) 5161 CGF.EmitRuntimeCall( 5162 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5163 DepWaitTaskArgs); 5164 // Call proxy_task_entry(gtid, new_task); 5165 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5166 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5167 Action.Enter(CGF); 5168 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5169 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5170 OutlinedFnArgs); 5171 }; 5172 5173 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5174 // kmp_task_t *new_task); 5175 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5176 // kmp_task_t *new_task); 5177 RegionCodeGenTy RCG(CodeGen); 5178 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5179 M, OMPRTL___kmpc_omp_task_begin_if0), 5180 TaskArgs, 5181 OMPBuilder.getOrCreateRuntimeFunction( 5182 M, OMPRTL___kmpc_omp_task_complete_if0), 5183 TaskArgs); 5184 RCG.setAction(Action); 5185 RCG(CGF); 5186 }; 5187 5188 if (IfCond) { 5189 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5190 } else { 5191 RegionCodeGenTy ThenRCG(ThenCodeGen); 5192 ThenRCG(CGF); 5193 } 5194 } 5195 5196 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5197 const OMPLoopDirective &D, 5198 llvm::Function *TaskFunction, 5199 QualType SharedsTy, Address Shareds, 5200 const Expr *IfCond, 5201 const OMPTaskDataTy &Data) { 5202 if (!CGF.HaveInsertPoint()) 5203 return; 5204 TaskResultTy Result = 5205 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5206 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5207 // libcall. 5208 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5209 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5210 // sched, kmp_uint64 grainsize, void *task_dup); 5211 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5212 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5213 llvm::Value *IfVal; 5214 if (IfCond) { 5215 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5216 /*isSigned=*/true); 5217 } else { 5218 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5219 } 5220 5221 LValue LBLVal = CGF.EmitLValueForField( 5222 Result.TDBase, 5223 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5224 const auto *LBVar = 5225 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5226 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5227 LBLVal.getQuals(), 5228 /*IsInitializer=*/true); 5229 LValue UBLVal = CGF.EmitLValueForField( 5230 Result.TDBase, 5231 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5232 const auto *UBVar = 5233 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5234 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5235 UBLVal.getQuals(), 5236 /*IsInitializer=*/true); 5237 LValue StLVal = CGF.EmitLValueForField( 5238 Result.TDBase, 5239 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5240 const auto *StVar = 5241 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5242 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5243 StLVal.getQuals(), 5244 /*IsInitializer=*/true); 5245 // Store reductions address. 5246 LValue RedLVal = CGF.EmitLValueForField( 5247 Result.TDBase, 5248 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5249 if (Data.Reductions) { 5250 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5251 } else { 5252 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5253 CGF.getContext().VoidPtrTy); 5254 } 5255 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5256 llvm::Value *TaskArgs[] = { 5257 UpLoc, 5258 ThreadID, 5259 Result.NewTask, 5260 IfVal, 5261 LBLVal.getPointer(CGF), 5262 UBLVal.getPointer(CGF), 5263 CGF.EmitLoadOfScalar(StLVal, Loc), 5264 llvm::ConstantInt::getSigned( 5265 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5266 llvm::ConstantInt::getSigned( 5267 CGF.IntTy, Data.Schedule.getPointer() 5268 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5269 : NoSchedule), 5270 Data.Schedule.getPointer() 5271 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5272 /*isSigned=*/false) 5273 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5274 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5275 Result.TaskDupFn, CGF.VoidPtrTy) 5276 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5277 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5278 CGM.getModule(), OMPRTL___kmpc_taskloop), 5279 TaskArgs); 5280 } 5281 5282 /// Emit reduction operation for each element of array (required for 5283 /// array sections) LHS op = RHS. 5284 /// \param Type Type of array. 5285 /// \param LHSVar Variable on the left side of the reduction operation 5286 /// (references element of array in original variable). 5287 /// \param RHSVar Variable on the right side of the reduction operation 5288 /// (references element of array in original variable). 5289 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5290 /// RHSVar. 5291 static void EmitOMPAggregateReduction( 5292 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5293 const VarDecl *RHSVar, 5294 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5295 const Expr *, const Expr *)> &RedOpGen, 5296 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5297 const Expr *UpExpr = nullptr) { 5298 // Perform element-by-element initialization. 5299 QualType ElementTy; 5300 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5301 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5302 5303 // Drill down to the base element type on both arrays. 5304 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5305 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5306 5307 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5308 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5309 // Cast from pointer to array type to pointer to single element. 5310 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5311 // The basic structure here is a while-do loop. 5312 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5313 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5314 llvm::Value *IsEmpty = 5315 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5316 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5317 5318 // Enter the loop body, making that address the current address. 5319 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5320 CGF.EmitBlock(BodyBB); 5321 5322 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5323 5324 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5325 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5326 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5327 Address RHSElementCurrent = 5328 Address(RHSElementPHI, 5329 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5330 5331 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5332 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5333 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5334 Address LHSElementCurrent = 5335 Address(LHSElementPHI, 5336 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5337 5338 // Emit copy. 5339 CodeGenFunction::OMPPrivateScope Scope(CGF); 5340 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5341 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5342 Scope.Privatize(); 5343 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5344 Scope.ForceCleanup(); 5345 5346 // Shift the address forward by one element. 5347 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5348 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5349 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5350 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5351 // Check whether we've reached the end. 5352 llvm::Value *Done = 5353 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5354 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5355 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5356 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5357 5358 // Done. 5359 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5360 } 5361 5362 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5363 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5364 /// UDR combiner function. 5365 static void emitReductionCombiner(CodeGenFunction &CGF, 5366 const Expr *ReductionOp) { 5367 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5368 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5369 if (const auto *DRE = 5370 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5371 if (const auto *DRD = 5372 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5373 std::pair<llvm::Function *, llvm::Function *> Reduction = 5374 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5375 RValue Func = RValue::get(Reduction.first); 5376 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5377 CGF.EmitIgnoredExpr(ReductionOp); 5378 return; 5379 } 5380 CGF.EmitIgnoredExpr(ReductionOp); 5381 } 5382 5383 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5384 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5385 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5386 ArrayRef<const Expr *> ReductionOps) { 5387 ASTContext &C = CGM.getContext(); 5388 5389 // void reduction_func(void *LHSArg, void *RHSArg); 5390 FunctionArgList Args; 5391 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5392 ImplicitParamDecl::Other); 5393 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5394 ImplicitParamDecl::Other); 5395 Args.push_back(&LHSArg); 5396 Args.push_back(&RHSArg); 5397 const auto &CGFI = 5398 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5399 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5400 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5401 llvm::GlobalValue::InternalLinkage, Name, 5402 &CGM.getModule()); 5403 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5404 Fn->setDoesNotRecurse(); 5405 CodeGenFunction CGF(CGM); 5406 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5407 5408 // Dst = (void*[n])(LHSArg); 5409 // Src = (void*[n])(RHSArg); 5410 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5411 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5412 ArgsType), CGF.getPointerAlign()); 5413 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5414 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5415 ArgsType), CGF.getPointerAlign()); 5416 5417 // ... 5418 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5419 // ... 5420 CodeGenFunction::OMPPrivateScope Scope(CGF); 5421 auto IPriv = Privates.begin(); 5422 unsigned Idx = 0; 5423 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5424 const auto *RHSVar = 5425 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5426 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5427 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5428 }); 5429 const auto *LHSVar = 5430 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5431 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5432 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5433 }); 5434 QualType PrivTy = (*IPriv)->getType(); 5435 if (PrivTy->isVariablyModifiedType()) { 5436 // Get array size and emit VLA type. 5437 ++Idx; 5438 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5439 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5440 const VariableArrayType *VLA = 5441 CGF.getContext().getAsVariableArrayType(PrivTy); 5442 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5443 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5444 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5445 CGF.EmitVariablyModifiedType(PrivTy); 5446 } 5447 } 5448 Scope.Privatize(); 5449 IPriv = Privates.begin(); 5450 auto ILHS = LHSExprs.begin(); 5451 auto IRHS = RHSExprs.begin(); 5452 for (const Expr *E : ReductionOps) { 5453 if ((*IPriv)->getType()->isArrayType()) { 5454 // Emit reduction for array section. 5455 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5456 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5457 EmitOMPAggregateReduction( 5458 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5459 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5460 emitReductionCombiner(CGF, E); 5461 }); 5462 } else { 5463 // Emit reduction for array subscript or single variable. 5464 emitReductionCombiner(CGF, E); 5465 } 5466 ++IPriv; 5467 ++ILHS; 5468 ++IRHS; 5469 } 5470 Scope.ForceCleanup(); 5471 CGF.FinishFunction(); 5472 return Fn; 5473 } 5474 5475 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5476 const Expr *ReductionOp, 5477 const Expr *PrivateRef, 5478 const DeclRefExpr *LHS, 5479 const DeclRefExpr *RHS) { 5480 if (PrivateRef->getType()->isArrayType()) { 5481 // Emit reduction for array section. 5482 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5483 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5484 EmitOMPAggregateReduction( 5485 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5486 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5487 emitReductionCombiner(CGF, ReductionOp); 5488 }); 5489 } else { 5490 // Emit reduction for array subscript or single variable. 5491 emitReductionCombiner(CGF, ReductionOp); 5492 } 5493 } 5494 5495 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5496 ArrayRef<const Expr *> Privates, 5497 ArrayRef<const Expr *> LHSExprs, 5498 ArrayRef<const Expr *> RHSExprs, 5499 ArrayRef<const Expr *> ReductionOps, 5500 ReductionOptionsTy Options) { 5501 if (!CGF.HaveInsertPoint()) 5502 return; 5503 5504 bool WithNowait = Options.WithNowait; 5505 bool SimpleReduction = Options.SimpleReduction; 5506 5507 // Next code should be emitted for reduction: 5508 // 5509 // static kmp_critical_name lock = { 0 }; 5510 // 5511 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5512 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5513 // ... 5514 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5515 // *(Type<n>-1*)rhs[<n>-1]); 5516 // } 5517 // 5518 // ... 5519 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5520 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5521 // RedList, reduce_func, &<lock>)) { 5522 // case 1: 5523 // ... 5524 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5525 // ... 5526 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5527 // break; 5528 // case 2: 5529 // ... 5530 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5531 // ... 5532 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5533 // break; 5534 // default:; 5535 // } 5536 // 5537 // if SimpleReduction is true, only the next code is generated: 5538 // ... 5539 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5540 // ... 5541 5542 ASTContext &C = CGM.getContext(); 5543 5544 if (SimpleReduction) { 5545 CodeGenFunction::RunCleanupsScope Scope(CGF); 5546 auto IPriv = Privates.begin(); 5547 auto ILHS = LHSExprs.begin(); 5548 auto IRHS = RHSExprs.begin(); 5549 for (const Expr *E : ReductionOps) { 5550 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5551 cast<DeclRefExpr>(*IRHS)); 5552 ++IPriv; 5553 ++ILHS; 5554 ++IRHS; 5555 } 5556 return; 5557 } 5558 5559 // 1. Build a list of reduction variables. 5560 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5561 auto Size = RHSExprs.size(); 5562 for (const Expr *E : Privates) { 5563 if (E->getType()->isVariablyModifiedType()) 5564 // Reserve place for array size. 5565 ++Size; 5566 } 5567 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5568 QualType ReductionArrayTy = 5569 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5570 /*IndexTypeQuals=*/0); 5571 Address ReductionList = 5572 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5573 auto IPriv = Privates.begin(); 5574 unsigned Idx = 0; 5575 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5576 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5577 CGF.Builder.CreateStore( 5578 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5579 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5580 Elem); 5581 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5582 // Store array size. 5583 ++Idx; 5584 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5585 llvm::Value *Size = CGF.Builder.CreateIntCast( 5586 CGF.getVLASize( 5587 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5588 .NumElts, 5589 CGF.SizeTy, /*isSigned=*/false); 5590 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5591 Elem); 5592 } 5593 } 5594 5595 // 2. Emit reduce_func(). 5596 llvm::Function *ReductionFn = emitReductionFunction( 5597 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5598 LHSExprs, RHSExprs, ReductionOps); 5599 5600 // 3. Create static kmp_critical_name lock = { 0 }; 5601 std::string Name = getName({"reduction"}); 5602 llvm::Value *Lock = getCriticalRegionLock(Name); 5603 5604 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5605 // RedList, reduce_func, &<lock>); 5606 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5607 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5608 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5609 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5610 ReductionList.getPointer(), CGF.VoidPtrTy); 5611 llvm::Value *Args[] = { 5612 IdentTLoc, // ident_t *<loc> 5613 ThreadId, // i32 <gtid> 5614 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5615 ReductionArrayTySize, // size_type sizeof(RedList) 5616 RL, // void *RedList 5617 ReductionFn, // void (*) (void *, void *) <reduce_func> 5618 Lock // kmp_critical_name *&<lock> 5619 }; 5620 llvm::Value *Res = CGF.EmitRuntimeCall( 5621 OMPBuilder.getOrCreateRuntimeFunction( 5622 CGM.getModule(), 5623 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5624 Args); 5625 5626 // 5. Build switch(res) 5627 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5628 llvm::SwitchInst *SwInst = 5629 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5630 5631 // 6. Build case 1: 5632 // ... 5633 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5634 // ... 5635 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5636 // break; 5637 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5638 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5639 CGF.EmitBlock(Case1BB); 5640 5641 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5642 llvm::Value *EndArgs[] = { 5643 IdentTLoc, // ident_t *<loc> 5644 ThreadId, // i32 <gtid> 5645 Lock // kmp_critical_name *&<lock> 5646 }; 5647 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5648 CodeGenFunction &CGF, PrePostActionTy &Action) { 5649 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5650 auto IPriv = Privates.begin(); 5651 auto ILHS = LHSExprs.begin(); 5652 auto IRHS = RHSExprs.begin(); 5653 for (const Expr *E : ReductionOps) { 5654 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5655 cast<DeclRefExpr>(*IRHS)); 5656 ++IPriv; 5657 ++ILHS; 5658 ++IRHS; 5659 } 5660 }; 5661 RegionCodeGenTy RCG(CodeGen); 5662 CommonActionTy Action( 5663 nullptr, llvm::None, 5664 OMPBuilder.getOrCreateRuntimeFunction( 5665 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5666 : OMPRTL___kmpc_end_reduce), 5667 EndArgs); 5668 RCG.setAction(Action); 5669 RCG(CGF); 5670 5671 CGF.EmitBranch(DefaultBB); 5672 5673 // 7. Build case 2: 5674 // ... 5675 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5676 // ... 5677 // break; 5678 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5679 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5680 CGF.EmitBlock(Case2BB); 5681 5682 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5683 CodeGenFunction &CGF, PrePostActionTy &Action) { 5684 auto ILHS = LHSExprs.begin(); 5685 auto IRHS = RHSExprs.begin(); 5686 auto IPriv = Privates.begin(); 5687 for (const Expr *E : ReductionOps) { 5688 const Expr *XExpr = nullptr; 5689 const Expr *EExpr = nullptr; 5690 const Expr *UpExpr = nullptr; 5691 BinaryOperatorKind BO = BO_Comma; 5692 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5693 if (BO->getOpcode() == BO_Assign) { 5694 XExpr = BO->getLHS(); 5695 UpExpr = BO->getRHS(); 5696 } 5697 } 5698 // Try to emit update expression as a simple atomic. 5699 const Expr *RHSExpr = UpExpr; 5700 if (RHSExpr) { 5701 // Analyze RHS part of the whole expression. 5702 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5703 RHSExpr->IgnoreParenImpCasts())) { 5704 // If this is a conditional operator, analyze its condition for 5705 // min/max reduction operator. 5706 RHSExpr = ACO->getCond(); 5707 } 5708 if (const auto *BORHS = 5709 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5710 EExpr = BORHS->getRHS(); 5711 BO = BORHS->getOpcode(); 5712 } 5713 } 5714 if (XExpr) { 5715 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5716 auto &&AtomicRedGen = [BO, VD, 5717 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5718 const Expr *EExpr, const Expr *UpExpr) { 5719 LValue X = CGF.EmitLValue(XExpr); 5720 RValue E; 5721 if (EExpr) 5722 E = CGF.EmitAnyExpr(EExpr); 5723 CGF.EmitOMPAtomicSimpleUpdateExpr( 5724 X, E, BO, /*IsXLHSInRHSPart=*/true, 5725 llvm::AtomicOrdering::Monotonic, Loc, 5726 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5727 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5728 PrivateScope.addPrivate( 5729 VD, [&CGF, VD, XRValue, Loc]() { 5730 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5731 CGF.emitOMPSimpleStore( 5732 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5733 VD->getType().getNonReferenceType(), Loc); 5734 return LHSTemp; 5735 }); 5736 (void)PrivateScope.Privatize(); 5737 return CGF.EmitAnyExpr(UpExpr); 5738 }); 5739 }; 5740 if ((*IPriv)->getType()->isArrayType()) { 5741 // Emit atomic reduction for array section. 5742 const auto *RHSVar = 5743 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5744 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5745 AtomicRedGen, XExpr, EExpr, UpExpr); 5746 } else { 5747 // Emit atomic reduction for array subscript or single variable. 5748 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5749 } 5750 } else { 5751 // Emit as a critical region. 5752 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5753 const Expr *, const Expr *) { 5754 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5755 std::string Name = RT.getName({"atomic_reduction"}); 5756 RT.emitCriticalRegion( 5757 CGF, Name, 5758 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5759 Action.Enter(CGF); 5760 emitReductionCombiner(CGF, E); 5761 }, 5762 Loc); 5763 }; 5764 if ((*IPriv)->getType()->isArrayType()) { 5765 const auto *LHSVar = 5766 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5767 const auto *RHSVar = 5768 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5769 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5770 CritRedGen); 5771 } else { 5772 CritRedGen(CGF, nullptr, nullptr, nullptr); 5773 } 5774 } 5775 ++ILHS; 5776 ++IRHS; 5777 ++IPriv; 5778 } 5779 }; 5780 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5781 if (!WithNowait) { 5782 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5783 llvm::Value *EndArgs[] = { 5784 IdentTLoc, // ident_t *<loc> 5785 ThreadId, // i32 <gtid> 5786 Lock // kmp_critical_name *&<lock> 5787 }; 5788 CommonActionTy Action(nullptr, llvm::None, 5789 OMPBuilder.getOrCreateRuntimeFunction( 5790 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5791 EndArgs); 5792 AtomicRCG.setAction(Action); 5793 AtomicRCG(CGF); 5794 } else { 5795 AtomicRCG(CGF); 5796 } 5797 5798 CGF.EmitBranch(DefaultBB); 5799 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5800 } 5801 5802 /// Generates unique name for artificial threadprivate variables. 5803 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5804 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5805 const Expr *Ref) { 5806 SmallString<256> Buffer; 5807 llvm::raw_svector_ostream Out(Buffer); 5808 const clang::DeclRefExpr *DE; 5809 const VarDecl *D = ::getBaseDecl(Ref, DE); 5810 if (!D) 5811 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5812 D = D->getCanonicalDecl(); 5813 std::string Name = CGM.getOpenMPRuntime().getName( 5814 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5815 Out << Prefix << Name << "_" 5816 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5817 return std::string(Out.str()); 5818 } 5819 5820 /// Emits reduction initializer function: 5821 /// \code 5822 /// void @.red_init(void* %arg, void* %orig) { 5823 /// %0 = bitcast void* %arg to <type>* 5824 /// store <type> <init>, <type>* %0 5825 /// ret void 5826 /// } 5827 /// \endcode 5828 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5829 SourceLocation Loc, 5830 ReductionCodeGen &RCG, unsigned N) { 5831 ASTContext &C = CGM.getContext(); 5832 QualType VoidPtrTy = C.VoidPtrTy; 5833 VoidPtrTy.addRestrict(); 5834 FunctionArgList Args; 5835 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5836 ImplicitParamDecl::Other); 5837 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5838 ImplicitParamDecl::Other); 5839 Args.emplace_back(&Param); 5840 Args.emplace_back(&ParamOrig); 5841 const auto &FnInfo = 5842 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5843 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5844 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5845 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5846 Name, &CGM.getModule()); 5847 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5848 Fn->setDoesNotRecurse(); 5849 CodeGenFunction CGF(CGM); 5850 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5851 Address PrivateAddr = CGF.EmitLoadOfPointer( 5852 CGF.GetAddrOfLocalVar(&Param), 5853 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5854 llvm::Value *Size = nullptr; 5855 // If the size of the reduction item is non-constant, load it from global 5856 // threadprivate variable. 5857 if (RCG.getSizes(N).second) { 5858 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5859 CGF, CGM.getContext().getSizeType(), 5860 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5861 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5862 CGM.getContext().getSizeType(), Loc); 5863 } 5864 RCG.emitAggregateType(CGF, N, Size); 5865 LValue OrigLVal; 5866 // If initializer uses initializer from declare reduction construct, emit a 5867 // pointer to the address of the original reduction item (reuired by reduction 5868 // initializer) 5869 if (RCG.usesReductionInitializer(N)) { 5870 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5871 SharedAddr = CGF.EmitLoadOfPointer( 5872 SharedAddr, 5873 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5874 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5875 } else { 5876 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5877 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5878 CGM.getContext().VoidPtrTy); 5879 } 5880 // Emit the initializer: 5881 // %0 = bitcast void* %arg to <type>* 5882 // store <type> <init>, <type>* %0 5883 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5884 [](CodeGenFunction &) { return false; }); 5885 CGF.FinishFunction(); 5886 return Fn; 5887 } 5888 5889 /// Emits reduction combiner function: 5890 /// \code 5891 /// void @.red_comb(void* %arg0, void* %arg1) { 5892 /// %lhs = bitcast void* %arg0 to <type>* 5893 /// %rhs = bitcast void* %arg1 to <type>* 5894 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5895 /// store <type> %2, <type>* %lhs 5896 /// ret void 5897 /// } 5898 /// \endcode 5899 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5900 SourceLocation Loc, 5901 ReductionCodeGen &RCG, unsigned N, 5902 const Expr *ReductionOp, 5903 const Expr *LHS, const Expr *RHS, 5904 const Expr *PrivateRef) { 5905 ASTContext &C = CGM.getContext(); 5906 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5907 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5908 FunctionArgList Args; 5909 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5910 C.VoidPtrTy, ImplicitParamDecl::Other); 5911 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5912 ImplicitParamDecl::Other); 5913 Args.emplace_back(&ParamInOut); 5914 Args.emplace_back(&ParamIn); 5915 const auto &FnInfo = 5916 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5917 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5918 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5919 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5920 Name, &CGM.getModule()); 5921 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5922 Fn->setDoesNotRecurse(); 5923 CodeGenFunction CGF(CGM); 5924 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5925 llvm::Value *Size = nullptr; 5926 // If the size of the reduction item is non-constant, load it from global 5927 // threadprivate variable. 5928 if (RCG.getSizes(N).second) { 5929 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5930 CGF, CGM.getContext().getSizeType(), 5931 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5932 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5933 CGM.getContext().getSizeType(), Loc); 5934 } 5935 RCG.emitAggregateType(CGF, N, Size); 5936 // Remap lhs and rhs variables to the addresses of the function arguments. 5937 // %lhs = bitcast void* %arg0 to <type>* 5938 // %rhs = bitcast void* %arg1 to <type>* 5939 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5940 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5941 // Pull out the pointer to the variable. 5942 Address PtrAddr = CGF.EmitLoadOfPointer( 5943 CGF.GetAddrOfLocalVar(&ParamInOut), 5944 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5945 return CGF.Builder.CreateElementBitCast( 5946 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5947 }); 5948 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5949 // Pull out the pointer to the variable. 5950 Address PtrAddr = CGF.EmitLoadOfPointer( 5951 CGF.GetAddrOfLocalVar(&ParamIn), 5952 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5953 return CGF.Builder.CreateElementBitCast( 5954 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5955 }); 5956 PrivateScope.Privatize(); 5957 // Emit the combiner body: 5958 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5959 // store <type> %2, <type>* %lhs 5960 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5961 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5962 cast<DeclRefExpr>(RHS)); 5963 CGF.FinishFunction(); 5964 return Fn; 5965 } 5966 5967 /// Emits reduction finalizer function: 5968 /// \code 5969 /// void @.red_fini(void* %arg) { 5970 /// %0 = bitcast void* %arg to <type>* 5971 /// <destroy>(<type>* %0) 5972 /// ret void 5973 /// } 5974 /// \endcode 5975 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5976 SourceLocation Loc, 5977 ReductionCodeGen &RCG, unsigned N) { 5978 if (!RCG.needCleanups(N)) 5979 return nullptr; 5980 ASTContext &C = CGM.getContext(); 5981 FunctionArgList Args; 5982 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5983 ImplicitParamDecl::Other); 5984 Args.emplace_back(&Param); 5985 const auto &FnInfo = 5986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5987 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5988 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5989 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5990 Name, &CGM.getModule()); 5991 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5992 Fn->setDoesNotRecurse(); 5993 CodeGenFunction CGF(CGM); 5994 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5995 Address PrivateAddr = CGF.EmitLoadOfPointer( 5996 CGF.GetAddrOfLocalVar(&Param), 5997 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5998 llvm::Value *Size = nullptr; 5999 // If the size of the reduction item is non-constant, load it from global 6000 // threadprivate variable. 6001 if (RCG.getSizes(N).second) { 6002 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6003 CGF, CGM.getContext().getSizeType(), 6004 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6005 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6006 CGM.getContext().getSizeType(), Loc); 6007 } 6008 RCG.emitAggregateType(CGF, N, Size); 6009 // Emit the finalizer body: 6010 // <destroy>(<type>* %0) 6011 RCG.emitCleanups(CGF, N, PrivateAddr); 6012 CGF.FinishFunction(Loc); 6013 return Fn; 6014 } 6015 6016 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6017 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6018 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6019 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6020 return nullptr; 6021 6022 // Build typedef struct: 6023 // kmp_taskred_input { 6024 // void *reduce_shar; // shared reduction item 6025 // void *reduce_orig; // original reduction item used for initialization 6026 // size_t reduce_size; // size of data item 6027 // void *reduce_init; // data initialization routine 6028 // void *reduce_fini; // data finalization routine 6029 // void *reduce_comb; // data combiner routine 6030 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6031 // } kmp_taskred_input_t; 6032 ASTContext &C = CGM.getContext(); 6033 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6034 RD->startDefinition(); 6035 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6036 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6037 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6038 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6039 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6040 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6041 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6042 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6043 RD->completeDefinition(); 6044 QualType RDType = C.getRecordType(RD); 6045 unsigned Size = Data.ReductionVars.size(); 6046 llvm::APInt ArraySize(/*numBits=*/64, Size); 6047 QualType ArrayRDType = C.getConstantArrayType( 6048 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6049 // kmp_task_red_input_t .rd_input.[Size]; 6050 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6051 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6052 Data.ReductionCopies, Data.ReductionOps); 6053 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6054 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6055 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6056 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6057 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6058 TaskRedInput.getPointer(), Idxs, 6059 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6060 ".rd_input.gep."); 6061 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6062 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6063 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6064 RCG.emitSharedOrigLValue(CGF, Cnt); 6065 llvm::Value *CastedShared = 6066 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6067 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6068 // ElemLVal.reduce_orig = &Origs[Cnt]; 6069 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6070 llvm::Value *CastedOrig = 6071 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6072 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6073 RCG.emitAggregateType(CGF, Cnt); 6074 llvm::Value *SizeValInChars; 6075 llvm::Value *SizeVal; 6076 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6077 // We use delayed creation/initialization for VLAs and array sections. It is 6078 // required because runtime does not provide the way to pass the sizes of 6079 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6080 // threadprivate global variables are used to store these values and use 6081 // them in the functions. 6082 bool DelayedCreation = !!SizeVal; 6083 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6084 /*isSigned=*/false); 6085 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6086 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6087 // ElemLVal.reduce_init = init; 6088 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6089 llvm::Value *InitAddr = 6090 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6091 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6092 // ElemLVal.reduce_fini = fini; 6093 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6094 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6095 llvm::Value *FiniAddr = Fini 6096 ? CGF.EmitCastToVoidPtr(Fini) 6097 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6098 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6099 // ElemLVal.reduce_comb = comb; 6100 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6101 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6102 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6103 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6104 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6105 // ElemLVal.flags = 0; 6106 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6107 if (DelayedCreation) { 6108 CGF.EmitStoreOfScalar( 6109 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6110 FlagsLVal); 6111 } else 6112 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6113 FlagsLVal.getType()); 6114 } 6115 if (Data.IsReductionWithTaskMod) { 6116 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6117 // is_ws, int num, void *data); 6118 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6119 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6120 CGM.IntTy, /*isSigned=*/true); 6121 llvm::Value *Args[] = { 6122 IdentTLoc, GTid, 6123 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6124 /*isSigned=*/true), 6125 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6126 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6127 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6128 return CGF.EmitRuntimeCall( 6129 OMPBuilder.getOrCreateRuntimeFunction( 6130 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6131 Args); 6132 } 6133 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6134 llvm::Value *Args[] = { 6135 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6136 /*isSigned=*/true), 6137 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6138 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6139 CGM.VoidPtrTy)}; 6140 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6141 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6142 Args); 6143 } 6144 6145 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6146 SourceLocation Loc, 6147 bool IsWorksharingReduction) { 6148 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6149 // is_ws, int num, void *data); 6150 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6151 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6152 CGM.IntTy, /*isSigned=*/true); 6153 llvm::Value *Args[] = {IdentTLoc, GTid, 6154 llvm::ConstantInt::get(CGM.IntTy, 6155 IsWorksharingReduction ? 1 : 0, 6156 /*isSigned=*/true)}; 6157 (void)CGF.EmitRuntimeCall( 6158 OMPBuilder.getOrCreateRuntimeFunction( 6159 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6160 Args); 6161 } 6162 6163 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6164 SourceLocation Loc, 6165 ReductionCodeGen &RCG, 6166 unsigned N) { 6167 auto Sizes = RCG.getSizes(N); 6168 // Emit threadprivate global variable if the type is non-constant 6169 // (Sizes.second = nullptr). 6170 if (Sizes.second) { 6171 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6172 /*isSigned=*/false); 6173 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6174 CGF, CGM.getContext().getSizeType(), 6175 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6176 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6177 } 6178 } 6179 6180 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6181 SourceLocation Loc, 6182 llvm::Value *ReductionsPtr, 6183 LValue SharedLVal) { 6184 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6185 // *d); 6186 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6187 CGM.IntTy, 6188 /*isSigned=*/true), 6189 ReductionsPtr, 6190 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6191 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6192 return Address( 6193 CGF.EmitRuntimeCall( 6194 OMPBuilder.getOrCreateRuntimeFunction( 6195 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6196 Args), 6197 SharedLVal.getAlignment()); 6198 } 6199 6200 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6201 SourceLocation Loc) { 6202 if (!CGF.HaveInsertPoint()) 6203 return; 6204 6205 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6206 OMPBuilder.createTaskwait(CGF.Builder); 6207 } else { 6208 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6209 // global_tid); 6210 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6211 // Ignore return result until untied tasks are supported. 6212 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6213 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6214 Args); 6215 } 6216 6217 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6218 Region->emitUntiedSwitch(CGF); 6219 } 6220 6221 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6222 OpenMPDirectiveKind InnerKind, 6223 const RegionCodeGenTy &CodeGen, 6224 bool HasCancel) { 6225 if (!CGF.HaveInsertPoint()) 6226 return; 6227 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6228 InnerKind != OMPD_critical && 6229 InnerKind != OMPD_master); 6230 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6231 } 6232 6233 namespace { 6234 enum RTCancelKind { 6235 CancelNoreq = 0, 6236 CancelParallel = 1, 6237 CancelLoop = 2, 6238 CancelSections = 3, 6239 CancelTaskgroup = 4 6240 }; 6241 } // anonymous namespace 6242 6243 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6244 RTCancelKind CancelKind = CancelNoreq; 6245 if (CancelRegion == OMPD_parallel) 6246 CancelKind = CancelParallel; 6247 else if (CancelRegion == OMPD_for) 6248 CancelKind = CancelLoop; 6249 else if (CancelRegion == OMPD_sections) 6250 CancelKind = CancelSections; 6251 else { 6252 assert(CancelRegion == OMPD_taskgroup); 6253 CancelKind = CancelTaskgroup; 6254 } 6255 return CancelKind; 6256 } 6257 6258 void CGOpenMPRuntime::emitCancellationPointCall( 6259 CodeGenFunction &CGF, SourceLocation Loc, 6260 OpenMPDirectiveKind CancelRegion) { 6261 if (!CGF.HaveInsertPoint()) 6262 return; 6263 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6264 // global_tid, kmp_int32 cncl_kind); 6265 if (auto *OMPRegionInfo = 6266 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6267 // For 'cancellation point taskgroup', the task region info may not have a 6268 // cancel. This may instead happen in another adjacent task. 6269 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6270 llvm::Value *Args[] = { 6271 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6272 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6273 // Ignore return result until untied tasks are supported. 6274 llvm::Value *Result = CGF.EmitRuntimeCall( 6275 OMPBuilder.getOrCreateRuntimeFunction( 6276 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6277 Args); 6278 // if (__kmpc_cancellationpoint()) { 6279 // exit from construct; 6280 // } 6281 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6282 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6283 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6284 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6285 CGF.EmitBlock(ExitBB); 6286 // exit from construct; 6287 CodeGenFunction::JumpDest CancelDest = 6288 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6289 CGF.EmitBranchThroughCleanup(CancelDest); 6290 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6291 } 6292 } 6293 } 6294 6295 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6296 const Expr *IfCond, 6297 OpenMPDirectiveKind CancelRegion) { 6298 if (!CGF.HaveInsertPoint()) 6299 return; 6300 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6301 // kmp_int32 cncl_kind); 6302 auto &M = CGM.getModule(); 6303 if (auto *OMPRegionInfo = 6304 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6305 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6306 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6307 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6308 llvm::Value *Args[] = { 6309 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6310 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6311 // Ignore return result until untied tasks are supported. 6312 llvm::Value *Result = CGF.EmitRuntimeCall( 6313 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6314 // if (__kmpc_cancel()) { 6315 // exit from construct; 6316 // } 6317 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6318 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6319 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6320 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6321 CGF.EmitBlock(ExitBB); 6322 // exit from construct; 6323 CodeGenFunction::JumpDest CancelDest = 6324 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6325 CGF.EmitBranchThroughCleanup(CancelDest); 6326 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6327 }; 6328 if (IfCond) { 6329 emitIfClause(CGF, IfCond, ThenGen, 6330 [](CodeGenFunction &, PrePostActionTy &) {}); 6331 } else { 6332 RegionCodeGenTy ThenRCG(ThenGen); 6333 ThenRCG(CGF); 6334 } 6335 } 6336 } 6337 6338 namespace { 6339 /// Cleanup action for uses_allocators support. 6340 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6341 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6342 6343 public: 6344 OMPUsesAllocatorsActionTy( 6345 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6346 : Allocators(Allocators) {} 6347 void Enter(CodeGenFunction &CGF) override { 6348 if (!CGF.HaveInsertPoint()) 6349 return; 6350 for (const auto &AllocatorData : Allocators) { 6351 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6352 CGF, AllocatorData.first, AllocatorData.second); 6353 } 6354 } 6355 void Exit(CodeGenFunction &CGF) override { 6356 if (!CGF.HaveInsertPoint()) 6357 return; 6358 for (const auto &AllocatorData : Allocators) { 6359 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6360 AllocatorData.first); 6361 } 6362 } 6363 }; 6364 } // namespace 6365 6366 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6367 const OMPExecutableDirective &D, StringRef ParentName, 6368 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6369 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6370 assert(!ParentName.empty() && "Invalid target region parent name!"); 6371 HasEmittedTargetRegion = true; 6372 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6373 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6374 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6375 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6376 if (!D.AllocatorTraits) 6377 continue; 6378 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6379 } 6380 } 6381 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6382 CodeGen.setAction(UsesAllocatorAction); 6383 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6384 IsOffloadEntry, CodeGen); 6385 } 6386 6387 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6388 const Expr *Allocator, 6389 const Expr *AllocatorTraits) { 6390 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6391 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6392 // Use default memspace handle. 6393 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6394 llvm::Value *NumTraits = llvm::ConstantInt::get( 6395 CGF.IntTy, cast<ConstantArrayType>( 6396 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6397 ->getSize() 6398 .getLimitedValue()); 6399 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6400 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6401 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6402 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6403 AllocatorTraitsLVal.getBaseInfo(), 6404 AllocatorTraitsLVal.getTBAAInfo()); 6405 llvm::Value *Traits = 6406 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6407 6408 llvm::Value *AllocatorVal = 6409 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6410 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6411 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6412 // Store to allocator. 6413 CGF.EmitVarDecl(*cast<VarDecl>( 6414 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6415 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6416 AllocatorVal = 6417 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6418 Allocator->getType(), Allocator->getExprLoc()); 6419 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6420 } 6421 6422 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6423 const Expr *Allocator) { 6424 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6425 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6426 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6427 llvm::Value *AllocatorVal = 6428 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6429 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6430 CGF.getContext().VoidPtrTy, 6431 Allocator->getExprLoc()); 6432 (void)CGF.EmitRuntimeCall( 6433 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6434 OMPRTL___kmpc_destroy_allocator), 6435 {ThreadId, AllocatorVal}); 6436 } 6437 6438 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6439 const OMPExecutableDirective &D, StringRef ParentName, 6440 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6441 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6442 // Create a unique name for the entry function using the source location 6443 // information of the current target region. The name will be something like: 6444 // 6445 // __omp_offloading_DD_FFFF_PP_lBB 6446 // 6447 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6448 // mangled name of the function that encloses the target region and BB is the 6449 // line number of the target region. 6450 6451 unsigned DeviceID; 6452 unsigned FileID; 6453 unsigned Line; 6454 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6455 Line); 6456 SmallString<64> EntryFnName; 6457 { 6458 llvm::raw_svector_ostream OS(EntryFnName); 6459 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6460 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6461 } 6462 6463 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6464 6465 CodeGenFunction CGF(CGM, true); 6466 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6467 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6468 6469 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6470 6471 // If this target outline function is not an offload entry, we don't need to 6472 // register it. 6473 if (!IsOffloadEntry) 6474 return; 6475 6476 // The target region ID is used by the runtime library to identify the current 6477 // target region, so it only has to be unique and not necessarily point to 6478 // anything. It could be the pointer to the outlined function that implements 6479 // the target region, but we aren't using that so that the compiler doesn't 6480 // need to keep that, and could therefore inline the host function if proven 6481 // worthwhile during optimization. In the other hand, if emitting code for the 6482 // device, the ID has to be the function address so that it can retrieved from 6483 // the offloading entry and launched by the runtime library. We also mark the 6484 // outlined function to have external linkage in case we are emitting code for 6485 // the device, because these functions will be entry points to the device. 6486 6487 if (CGM.getLangOpts().OpenMPIsDevice) { 6488 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6489 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6490 OutlinedFn->setDSOLocal(false); 6491 if (CGM.getTriple().isAMDGCN()) 6492 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6493 } else { 6494 std::string Name = getName({EntryFnName, "region_id"}); 6495 OutlinedFnID = new llvm::GlobalVariable( 6496 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6497 llvm::GlobalValue::WeakAnyLinkage, 6498 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6499 } 6500 6501 // Register the information for the entry associated with this target region. 6502 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6503 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6504 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6505 } 6506 6507 /// Checks if the expression is constant or does not have non-trivial function 6508 /// calls. 6509 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6510 // We can skip constant expressions. 6511 // We can skip expressions with trivial calls or simple expressions. 6512 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6513 !E->hasNonTrivialCall(Ctx)) && 6514 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6515 } 6516 6517 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6518 const Stmt *Body) { 6519 const Stmt *Child = Body->IgnoreContainers(); 6520 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6521 Child = nullptr; 6522 for (const Stmt *S : C->body()) { 6523 if (const auto *E = dyn_cast<Expr>(S)) { 6524 if (isTrivial(Ctx, E)) 6525 continue; 6526 } 6527 // Some of the statements can be ignored. 6528 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6529 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6530 continue; 6531 // Analyze declarations. 6532 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6533 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6534 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6535 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6536 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6537 isa<UsingDirectiveDecl>(D) || 6538 isa<OMPDeclareReductionDecl>(D) || 6539 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6540 return true; 6541 const auto *VD = dyn_cast<VarDecl>(D); 6542 if (!VD) 6543 return false; 6544 return VD->isConstexpr() || 6545 ((VD->getType().isTrivialType(Ctx) || 6546 VD->getType()->isReferenceType()) && 6547 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6548 })) 6549 continue; 6550 } 6551 // Found multiple children - cannot get the one child only. 6552 if (Child) 6553 return nullptr; 6554 Child = S; 6555 } 6556 if (Child) 6557 Child = Child->IgnoreContainers(); 6558 } 6559 return Child; 6560 } 6561 6562 /// Emit the number of teams for a target directive. Inspect the num_teams 6563 /// clause associated with a teams construct combined or closely nested 6564 /// with the target directive. 6565 /// 6566 /// Emit a team of size one for directives such as 'target parallel' that 6567 /// have no associated teams construct. 6568 /// 6569 /// Otherwise, return nullptr. 6570 static llvm::Value * 6571 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6572 const OMPExecutableDirective &D) { 6573 assert(!CGF.getLangOpts().OpenMPIsDevice && 6574 "Clauses associated with the teams directive expected to be emitted " 6575 "only for the host!"); 6576 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6577 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6578 "Expected target-based executable directive."); 6579 CGBuilderTy &Bld = CGF.Builder; 6580 switch (DirectiveKind) { 6581 case OMPD_target: { 6582 const auto *CS = D.getInnermostCapturedStmt(); 6583 const auto *Body = 6584 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6585 const Stmt *ChildStmt = 6586 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6587 if (const auto *NestedDir = 6588 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6589 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6590 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6591 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6592 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6593 const Expr *NumTeams = 6594 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6595 llvm::Value *NumTeamsVal = 6596 CGF.EmitScalarExpr(NumTeams, 6597 /*IgnoreResultAssign*/ true); 6598 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6599 /*isSigned=*/true); 6600 } 6601 return Bld.getInt32(0); 6602 } 6603 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6604 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6605 return Bld.getInt32(1); 6606 return Bld.getInt32(0); 6607 } 6608 return nullptr; 6609 } 6610 case OMPD_target_teams: 6611 case OMPD_target_teams_distribute: 6612 case OMPD_target_teams_distribute_simd: 6613 case OMPD_target_teams_distribute_parallel_for: 6614 case OMPD_target_teams_distribute_parallel_for_simd: { 6615 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6616 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6617 const Expr *NumTeams = 6618 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6619 llvm::Value *NumTeamsVal = 6620 CGF.EmitScalarExpr(NumTeams, 6621 /*IgnoreResultAssign*/ true); 6622 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6623 /*isSigned=*/true); 6624 } 6625 return Bld.getInt32(0); 6626 } 6627 case OMPD_target_parallel: 6628 case OMPD_target_parallel_for: 6629 case OMPD_target_parallel_for_simd: 6630 case OMPD_target_simd: 6631 return Bld.getInt32(1); 6632 case OMPD_parallel: 6633 case OMPD_for: 6634 case OMPD_parallel_for: 6635 case OMPD_parallel_master: 6636 case OMPD_parallel_sections: 6637 case OMPD_for_simd: 6638 case OMPD_parallel_for_simd: 6639 case OMPD_cancel: 6640 case OMPD_cancellation_point: 6641 case OMPD_ordered: 6642 case OMPD_threadprivate: 6643 case OMPD_allocate: 6644 case OMPD_task: 6645 case OMPD_simd: 6646 case OMPD_tile: 6647 case OMPD_sections: 6648 case OMPD_section: 6649 case OMPD_single: 6650 case OMPD_master: 6651 case OMPD_critical: 6652 case OMPD_taskyield: 6653 case OMPD_barrier: 6654 case OMPD_taskwait: 6655 case OMPD_taskgroup: 6656 case OMPD_atomic: 6657 case OMPD_flush: 6658 case OMPD_depobj: 6659 case OMPD_scan: 6660 case OMPD_teams: 6661 case OMPD_target_data: 6662 case OMPD_target_exit_data: 6663 case OMPD_target_enter_data: 6664 case OMPD_distribute: 6665 case OMPD_distribute_simd: 6666 case OMPD_distribute_parallel_for: 6667 case OMPD_distribute_parallel_for_simd: 6668 case OMPD_teams_distribute: 6669 case OMPD_teams_distribute_simd: 6670 case OMPD_teams_distribute_parallel_for: 6671 case OMPD_teams_distribute_parallel_for_simd: 6672 case OMPD_target_update: 6673 case OMPD_declare_simd: 6674 case OMPD_declare_variant: 6675 case OMPD_begin_declare_variant: 6676 case OMPD_end_declare_variant: 6677 case OMPD_declare_target: 6678 case OMPD_end_declare_target: 6679 case OMPD_declare_reduction: 6680 case OMPD_declare_mapper: 6681 case OMPD_taskloop: 6682 case OMPD_taskloop_simd: 6683 case OMPD_master_taskloop: 6684 case OMPD_master_taskloop_simd: 6685 case OMPD_parallel_master_taskloop: 6686 case OMPD_parallel_master_taskloop_simd: 6687 case OMPD_requires: 6688 case OMPD_unknown: 6689 break; 6690 default: 6691 break; 6692 } 6693 llvm_unreachable("Unexpected directive kind."); 6694 } 6695 6696 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6697 llvm::Value *DefaultThreadLimitVal) { 6698 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6699 CGF.getContext(), CS->getCapturedStmt()); 6700 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6701 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6702 llvm::Value *NumThreads = nullptr; 6703 llvm::Value *CondVal = nullptr; 6704 // Handle if clause. If if clause present, the number of threads is 6705 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6706 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6707 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6708 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6709 const OMPIfClause *IfClause = nullptr; 6710 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6711 if (C->getNameModifier() == OMPD_unknown || 6712 C->getNameModifier() == OMPD_parallel) { 6713 IfClause = C; 6714 break; 6715 } 6716 } 6717 if (IfClause) { 6718 const Expr *Cond = IfClause->getCondition(); 6719 bool Result; 6720 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6721 if (!Result) 6722 return CGF.Builder.getInt32(1); 6723 } else { 6724 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6725 if (const auto *PreInit = 6726 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6727 for (const auto *I : PreInit->decls()) { 6728 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6729 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6730 } else { 6731 CodeGenFunction::AutoVarEmission Emission = 6732 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6733 CGF.EmitAutoVarCleanups(Emission); 6734 } 6735 } 6736 } 6737 CondVal = CGF.EvaluateExprAsBool(Cond); 6738 } 6739 } 6740 } 6741 // Check the value of num_threads clause iff if clause was not specified 6742 // or is not evaluated to false. 6743 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6744 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6745 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6746 const auto *NumThreadsClause = 6747 Dir->getSingleClause<OMPNumThreadsClause>(); 6748 CodeGenFunction::LexicalScope Scope( 6749 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6750 if (const auto *PreInit = 6751 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6752 for (const auto *I : PreInit->decls()) { 6753 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6754 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6755 } else { 6756 CodeGenFunction::AutoVarEmission Emission = 6757 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6758 CGF.EmitAutoVarCleanups(Emission); 6759 } 6760 } 6761 } 6762 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6763 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6764 /*isSigned=*/false); 6765 if (DefaultThreadLimitVal) 6766 NumThreads = CGF.Builder.CreateSelect( 6767 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6768 DefaultThreadLimitVal, NumThreads); 6769 } else { 6770 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6771 : CGF.Builder.getInt32(0); 6772 } 6773 // Process condition of the if clause. 6774 if (CondVal) { 6775 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6776 CGF.Builder.getInt32(1)); 6777 } 6778 return NumThreads; 6779 } 6780 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6781 return CGF.Builder.getInt32(1); 6782 return DefaultThreadLimitVal; 6783 } 6784 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6785 : CGF.Builder.getInt32(0); 6786 } 6787 6788 /// Emit the number of threads for a target directive. Inspect the 6789 /// thread_limit clause associated with a teams construct combined or closely 6790 /// nested with the target directive. 6791 /// 6792 /// Emit the num_threads clause for directives such as 'target parallel' that 6793 /// have no associated teams construct. 6794 /// 6795 /// Otherwise, return nullptr. 6796 static llvm::Value * 6797 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6798 const OMPExecutableDirective &D) { 6799 assert(!CGF.getLangOpts().OpenMPIsDevice && 6800 "Clauses associated with the teams directive expected to be emitted " 6801 "only for the host!"); 6802 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6803 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6804 "Expected target-based executable directive."); 6805 CGBuilderTy &Bld = CGF.Builder; 6806 llvm::Value *ThreadLimitVal = nullptr; 6807 llvm::Value *NumThreadsVal = nullptr; 6808 switch (DirectiveKind) { 6809 case OMPD_target: { 6810 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6811 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6812 return NumThreads; 6813 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6814 CGF.getContext(), CS->getCapturedStmt()); 6815 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6816 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6817 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6818 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6819 const auto *ThreadLimitClause = 6820 Dir->getSingleClause<OMPThreadLimitClause>(); 6821 CodeGenFunction::LexicalScope Scope( 6822 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6823 if (const auto *PreInit = 6824 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6825 for (const auto *I : PreInit->decls()) { 6826 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6827 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6828 } else { 6829 CodeGenFunction::AutoVarEmission Emission = 6830 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6831 CGF.EmitAutoVarCleanups(Emission); 6832 } 6833 } 6834 } 6835 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6836 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6837 ThreadLimitVal = 6838 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6839 } 6840 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6841 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6842 CS = Dir->getInnermostCapturedStmt(); 6843 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6844 CGF.getContext(), CS->getCapturedStmt()); 6845 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6846 } 6847 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6848 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6849 CS = Dir->getInnermostCapturedStmt(); 6850 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6851 return NumThreads; 6852 } 6853 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6854 return Bld.getInt32(1); 6855 } 6856 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6857 } 6858 case OMPD_target_teams: { 6859 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6860 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6861 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6862 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6863 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6864 ThreadLimitVal = 6865 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6866 } 6867 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6868 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6869 return NumThreads; 6870 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6871 CGF.getContext(), CS->getCapturedStmt()); 6872 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6873 if (Dir->getDirectiveKind() == OMPD_distribute) { 6874 CS = Dir->getInnermostCapturedStmt(); 6875 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6876 return NumThreads; 6877 } 6878 } 6879 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6880 } 6881 case OMPD_target_teams_distribute: 6882 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6883 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6884 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6885 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6886 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6887 ThreadLimitVal = 6888 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6889 } 6890 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6891 case OMPD_target_parallel: 6892 case OMPD_target_parallel_for: 6893 case OMPD_target_parallel_for_simd: 6894 case OMPD_target_teams_distribute_parallel_for: 6895 case OMPD_target_teams_distribute_parallel_for_simd: { 6896 llvm::Value *CondVal = nullptr; 6897 // Handle if clause. If if clause present, the number of threads is 6898 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6899 if (D.hasClausesOfKind<OMPIfClause>()) { 6900 const OMPIfClause *IfClause = nullptr; 6901 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6902 if (C->getNameModifier() == OMPD_unknown || 6903 C->getNameModifier() == OMPD_parallel) { 6904 IfClause = C; 6905 break; 6906 } 6907 } 6908 if (IfClause) { 6909 const Expr *Cond = IfClause->getCondition(); 6910 bool Result; 6911 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6912 if (!Result) 6913 return Bld.getInt32(1); 6914 } else { 6915 CodeGenFunction::RunCleanupsScope Scope(CGF); 6916 CondVal = CGF.EvaluateExprAsBool(Cond); 6917 } 6918 } 6919 } 6920 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6921 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6922 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6923 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6924 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6925 ThreadLimitVal = 6926 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6927 } 6928 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6929 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6930 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6931 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6932 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6933 NumThreadsVal = 6934 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6935 ThreadLimitVal = ThreadLimitVal 6936 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6937 ThreadLimitVal), 6938 NumThreadsVal, ThreadLimitVal) 6939 : NumThreadsVal; 6940 } 6941 if (!ThreadLimitVal) 6942 ThreadLimitVal = Bld.getInt32(0); 6943 if (CondVal) 6944 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6945 return ThreadLimitVal; 6946 } 6947 case OMPD_target_teams_distribute_simd: 6948 case OMPD_target_simd: 6949 return Bld.getInt32(1); 6950 case OMPD_parallel: 6951 case OMPD_for: 6952 case OMPD_parallel_for: 6953 case OMPD_parallel_master: 6954 case OMPD_parallel_sections: 6955 case OMPD_for_simd: 6956 case OMPD_parallel_for_simd: 6957 case OMPD_cancel: 6958 case OMPD_cancellation_point: 6959 case OMPD_ordered: 6960 case OMPD_threadprivate: 6961 case OMPD_allocate: 6962 case OMPD_task: 6963 case OMPD_simd: 6964 case OMPD_tile: 6965 case OMPD_sections: 6966 case OMPD_section: 6967 case OMPD_single: 6968 case OMPD_master: 6969 case OMPD_critical: 6970 case OMPD_taskyield: 6971 case OMPD_barrier: 6972 case OMPD_taskwait: 6973 case OMPD_taskgroup: 6974 case OMPD_atomic: 6975 case OMPD_flush: 6976 case OMPD_depobj: 6977 case OMPD_scan: 6978 case OMPD_teams: 6979 case OMPD_target_data: 6980 case OMPD_target_exit_data: 6981 case OMPD_target_enter_data: 6982 case OMPD_distribute: 6983 case OMPD_distribute_simd: 6984 case OMPD_distribute_parallel_for: 6985 case OMPD_distribute_parallel_for_simd: 6986 case OMPD_teams_distribute: 6987 case OMPD_teams_distribute_simd: 6988 case OMPD_teams_distribute_parallel_for: 6989 case OMPD_teams_distribute_parallel_for_simd: 6990 case OMPD_target_update: 6991 case OMPD_declare_simd: 6992 case OMPD_declare_variant: 6993 case OMPD_begin_declare_variant: 6994 case OMPD_end_declare_variant: 6995 case OMPD_declare_target: 6996 case OMPD_end_declare_target: 6997 case OMPD_declare_reduction: 6998 case OMPD_declare_mapper: 6999 case OMPD_taskloop: 7000 case OMPD_taskloop_simd: 7001 case OMPD_master_taskloop: 7002 case OMPD_master_taskloop_simd: 7003 case OMPD_parallel_master_taskloop: 7004 case OMPD_parallel_master_taskloop_simd: 7005 case OMPD_requires: 7006 case OMPD_unknown: 7007 break; 7008 default: 7009 break; 7010 } 7011 llvm_unreachable("Unsupported directive kind."); 7012 } 7013 7014 namespace { 7015 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7016 7017 // Utility to handle information from clauses associated with a given 7018 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7019 // It provides a convenient interface to obtain the information and generate 7020 // code for that information. 7021 class MappableExprsHandler { 7022 public: 7023 /// Values for bit flags used to specify the mapping type for 7024 /// offloading. 7025 enum OpenMPOffloadMappingFlags : uint64_t { 7026 /// No flags 7027 OMP_MAP_NONE = 0x0, 7028 /// Allocate memory on the device and move data from host to device. 7029 OMP_MAP_TO = 0x01, 7030 /// Allocate memory on the device and move data from device to host. 7031 OMP_MAP_FROM = 0x02, 7032 /// Always perform the requested mapping action on the element, even 7033 /// if it was already mapped before. 7034 OMP_MAP_ALWAYS = 0x04, 7035 /// Delete the element from the device environment, ignoring the 7036 /// current reference count associated with the element. 7037 OMP_MAP_DELETE = 0x08, 7038 /// The element being mapped is a pointer-pointee pair; both the 7039 /// pointer and the pointee should be mapped. 7040 OMP_MAP_PTR_AND_OBJ = 0x10, 7041 /// This flags signals that the base address of an entry should be 7042 /// passed to the target kernel as an argument. 7043 OMP_MAP_TARGET_PARAM = 0x20, 7044 /// Signal that the runtime library has to return the device pointer 7045 /// in the current position for the data being mapped. Used when we have the 7046 /// use_device_ptr or use_device_addr clause. 7047 OMP_MAP_RETURN_PARAM = 0x40, 7048 /// This flag signals that the reference being passed is a pointer to 7049 /// private data. 7050 OMP_MAP_PRIVATE = 0x80, 7051 /// Pass the element to the device by value. 7052 OMP_MAP_LITERAL = 0x100, 7053 /// Implicit map 7054 OMP_MAP_IMPLICIT = 0x200, 7055 /// Close is a hint to the runtime to allocate memory close to 7056 /// the target device. 7057 OMP_MAP_CLOSE = 0x400, 7058 /// 0x800 is reserved for compatibility with XLC. 7059 /// Produce a runtime error if the data is not already allocated. 7060 OMP_MAP_PRESENT = 0x1000, 7061 /// Signal that the runtime library should use args as an array of 7062 /// descriptor_dim pointers and use args_size as dims. Used when we have 7063 /// non-contiguous list items in target update directive 7064 OMP_MAP_NON_CONTIG = 0x100000000000, 7065 /// The 16 MSBs of the flags indicate whether the entry is member of some 7066 /// struct/class. 7067 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7068 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7069 }; 7070 7071 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7072 static unsigned getFlagMemberOffset() { 7073 unsigned Offset = 0; 7074 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7075 Remain = Remain >> 1) 7076 Offset++; 7077 return Offset; 7078 } 7079 7080 /// Class that holds debugging information for a data mapping to be passed to 7081 /// the runtime library. 7082 class MappingExprInfo { 7083 /// The variable declaration used for the data mapping. 7084 const ValueDecl *MapDecl = nullptr; 7085 /// The original expression used in the map clause, or null if there is 7086 /// none. 7087 const Expr *MapExpr = nullptr; 7088 7089 public: 7090 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7091 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7092 7093 const ValueDecl *getMapDecl() const { return MapDecl; } 7094 const Expr *getMapExpr() const { return MapExpr; } 7095 }; 7096 7097 /// Class that associates information with a base pointer to be passed to the 7098 /// runtime library. 7099 class BasePointerInfo { 7100 /// The base pointer. 7101 llvm::Value *Ptr = nullptr; 7102 /// The base declaration that refers to this device pointer, or null if 7103 /// there is none. 7104 const ValueDecl *DevPtrDecl = nullptr; 7105 7106 public: 7107 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7108 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7109 llvm::Value *operator*() const { return Ptr; } 7110 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7111 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7112 }; 7113 7114 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7115 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7116 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7117 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7118 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7119 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7120 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7121 7122 /// This structure contains combined information generated for mappable 7123 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7124 /// mappers, and non-contiguous information. 7125 struct MapCombinedInfoTy { 7126 struct StructNonContiguousInfo { 7127 bool IsNonContiguous = false; 7128 MapDimArrayTy Dims; 7129 MapNonContiguousArrayTy Offsets; 7130 MapNonContiguousArrayTy Counts; 7131 MapNonContiguousArrayTy Strides; 7132 }; 7133 MapExprsArrayTy Exprs; 7134 MapBaseValuesArrayTy BasePointers; 7135 MapValuesArrayTy Pointers; 7136 MapValuesArrayTy Sizes; 7137 MapFlagsArrayTy Types; 7138 MapMappersArrayTy Mappers; 7139 StructNonContiguousInfo NonContigInfo; 7140 7141 /// Append arrays in \a CurInfo. 7142 void append(MapCombinedInfoTy &CurInfo) { 7143 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7144 BasePointers.append(CurInfo.BasePointers.begin(), 7145 CurInfo.BasePointers.end()); 7146 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7147 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7148 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7149 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7150 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7151 CurInfo.NonContigInfo.Dims.end()); 7152 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7153 CurInfo.NonContigInfo.Offsets.end()); 7154 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7155 CurInfo.NonContigInfo.Counts.end()); 7156 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7157 CurInfo.NonContigInfo.Strides.end()); 7158 } 7159 }; 7160 7161 /// Map between a struct and the its lowest & highest elements which have been 7162 /// mapped. 7163 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7164 /// HE(FieldIndex, Pointer)} 7165 struct StructRangeInfoTy { 7166 MapCombinedInfoTy PreliminaryMapData; 7167 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7168 0, Address::invalid()}; 7169 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7170 0, Address::invalid()}; 7171 Address Base = Address::invalid(); 7172 Address LB = Address::invalid(); 7173 bool IsArraySection = false; 7174 bool HasCompleteRecord = false; 7175 }; 7176 7177 private: 7178 /// Kind that defines how a device pointer has to be returned. 7179 struct MapInfo { 7180 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7181 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7182 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7183 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7184 bool ReturnDevicePointer = false; 7185 bool IsImplicit = false; 7186 const ValueDecl *Mapper = nullptr; 7187 const Expr *VarRef = nullptr; 7188 bool ForDeviceAddr = false; 7189 7190 MapInfo() = default; 7191 MapInfo( 7192 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7193 OpenMPMapClauseKind MapType, 7194 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7195 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7196 bool ReturnDevicePointer, bool IsImplicit, 7197 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7198 bool ForDeviceAddr = false) 7199 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7200 MotionModifiers(MotionModifiers), 7201 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7202 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7203 }; 7204 7205 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7206 /// member and there is no map information about it, then emission of that 7207 /// entry is deferred until the whole struct has been processed. 7208 struct DeferredDevicePtrEntryTy { 7209 const Expr *IE = nullptr; 7210 const ValueDecl *VD = nullptr; 7211 bool ForDeviceAddr = false; 7212 7213 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7214 bool ForDeviceAddr) 7215 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7216 }; 7217 7218 /// The target directive from where the mappable clauses were extracted. It 7219 /// is either a executable directive or a user-defined mapper directive. 7220 llvm::PointerUnion<const OMPExecutableDirective *, 7221 const OMPDeclareMapperDecl *> 7222 CurDir; 7223 7224 /// Function the directive is being generated for. 7225 CodeGenFunction &CGF; 7226 7227 /// Set of all first private variables in the current directive. 7228 /// bool data is set to true if the variable is implicitly marked as 7229 /// firstprivate, false otherwise. 7230 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7231 7232 /// Map between device pointer declarations and their expression components. 7233 /// The key value for declarations in 'this' is null. 7234 llvm::DenseMap< 7235 const ValueDecl *, 7236 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7237 DevPointersMap; 7238 7239 llvm::Value *getExprTypeSize(const Expr *E) const { 7240 QualType ExprTy = E->getType().getCanonicalType(); 7241 7242 // Calculate the size for array shaping expression. 7243 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7244 llvm::Value *Size = 7245 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7246 for (const Expr *SE : OAE->getDimensions()) { 7247 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7248 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7249 CGF.getContext().getSizeType(), 7250 SE->getExprLoc()); 7251 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7252 } 7253 return Size; 7254 } 7255 7256 // Reference types are ignored for mapping purposes. 7257 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7258 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7259 7260 // Given that an array section is considered a built-in type, we need to 7261 // do the calculation based on the length of the section instead of relying 7262 // on CGF.getTypeSize(E->getType()). 7263 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7264 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7265 OAE->getBase()->IgnoreParenImpCasts()) 7266 .getCanonicalType(); 7267 7268 // If there is no length associated with the expression and lower bound is 7269 // not specified too, that means we are using the whole length of the 7270 // base. 7271 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7272 !OAE->getLowerBound()) 7273 return CGF.getTypeSize(BaseTy); 7274 7275 llvm::Value *ElemSize; 7276 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7277 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7278 } else { 7279 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7280 assert(ATy && "Expecting array type if not a pointer type."); 7281 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7282 } 7283 7284 // If we don't have a length at this point, that is because we have an 7285 // array section with a single element. 7286 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7287 return ElemSize; 7288 7289 if (const Expr *LenExpr = OAE->getLength()) { 7290 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7291 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7292 CGF.getContext().getSizeType(), 7293 LenExpr->getExprLoc()); 7294 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7295 } 7296 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7297 OAE->getLowerBound() && "expected array_section[lb:]."); 7298 // Size = sizetype - lb * elemtype; 7299 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7300 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7301 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7302 CGF.getContext().getSizeType(), 7303 OAE->getLowerBound()->getExprLoc()); 7304 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7305 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7306 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7307 LengthVal = CGF.Builder.CreateSelect( 7308 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7309 return LengthVal; 7310 } 7311 return CGF.getTypeSize(ExprTy); 7312 } 7313 7314 /// Return the corresponding bits for a given map clause modifier. Add 7315 /// a flag marking the map as a pointer if requested. Add a flag marking the 7316 /// map as the first one of a series of maps that relate to the same map 7317 /// expression. 7318 OpenMPOffloadMappingFlags getMapTypeBits( 7319 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7320 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7321 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7322 OpenMPOffloadMappingFlags Bits = 7323 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7324 switch (MapType) { 7325 case OMPC_MAP_alloc: 7326 case OMPC_MAP_release: 7327 // alloc and release is the default behavior in the runtime library, i.e. 7328 // if we don't pass any bits alloc/release that is what the runtime is 7329 // going to do. Therefore, we don't need to signal anything for these two 7330 // type modifiers. 7331 break; 7332 case OMPC_MAP_to: 7333 Bits |= OMP_MAP_TO; 7334 break; 7335 case OMPC_MAP_from: 7336 Bits |= OMP_MAP_FROM; 7337 break; 7338 case OMPC_MAP_tofrom: 7339 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7340 break; 7341 case OMPC_MAP_delete: 7342 Bits |= OMP_MAP_DELETE; 7343 break; 7344 case OMPC_MAP_unknown: 7345 llvm_unreachable("Unexpected map type!"); 7346 } 7347 if (AddPtrFlag) 7348 Bits |= OMP_MAP_PTR_AND_OBJ; 7349 if (AddIsTargetParamFlag) 7350 Bits |= OMP_MAP_TARGET_PARAM; 7351 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7352 != MapModifiers.end()) 7353 Bits |= OMP_MAP_ALWAYS; 7354 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7355 != MapModifiers.end()) 7356 Bits |= OMP_MAP_CLOSE; 7357 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7358 MapModifiers.end() || 7359 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7360 MotionModifiers.end()) 7361 Bits |= OMP_MAP_PRESENT; 7362 if (IsNonContiguous) 7363 Bits |= OMP_MAP_NON_CONTIG; 7364 return Bits; 7365 } 7366 7367 /// Return true if the provided expression is a final array section. A 7368 /// final array section, is one whose length can't be proved to be one. 7369 bool isFinalArraySectionExpression(const Expr *E) const { 7370 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7371 7372 // It is not an array section and therefore not a unity-size one. 7373 if (!OASE) 7374 return false; 7375 7376 // An array section with no colon always refer to a single element. 7377 if (OASE->getColonLocFirst().isInvalid()) 7378 return false; 7379 7380 const Expr *Length = OASE->getLength(); 7381 7382 // If we don't have a length we have to check if the array has size 1 7383 // for this dimension. Also, we should always expect a length if the 7384 // base type is pointer. 7385 if (!Length) { 7386 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7387 OASE->getBase()->IgnoreParenImpCasts()) 7388 .getCanonicalType(); 7389 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7390 return ATy->getSize().getSExtValue() != 1; 7391 // If we don't have a constant dimension length, we have to consider 7392 // the current section as having any size, so it is not necessarily 7393 // unitary. If it happen to be unity size, that's user fault. 7394 return true; 7395 } 7396 7397 // Check if the length evaluates to 1. 7398 Expr::EvalResult Result; 7399 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7400 return true; // Can have more that size 1. 7401 7402 llvm::APSInt ConstLength = Result.Val.getInt(); 7403 return ConstLength.getSExtValue() != 1; 7404 } 7405 7406 /// Generate the base pointers, section pointers, sizes, map type bits, and 7407 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7408 /// map type, map or motion modifiers, and expression components. 7409 /// \a IsFirstComponent should be set to true if the provided set of 7410 /// components is the first associated with a capture. 7411 void generateInfoForComponentList( 7412 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7413 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7414 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7415 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7416 bool IsFirstComponentList, bool IsImplicit, 7417 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7418 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7419 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7420 OverlappedElements = llvm::None) const { 7421 // The following summarizes what has to be generated for each map and the 7422 // types below. The generated information is expressed in this order: 7423 // base pointer, section pointer, size, flags 7424 // (to add to the ones that come from the map type and modifier). 7425 // 7426 // double d; 7427 // int i[100]; 7428 // float *p; 7429 // 7430 // struct S1 { 7431 // int i; 7432 // float f[50]; 7433 // } 7434 // struct S2 { 7435 // int i; 7436 // float f[50]; 7437 // S1 s; 7438 // double *p; 7439 // struct S2 *ps; 7440 // } 7441 // S2 s; 7442 // S2 *ps; 7443 // 7444 // map(d) 7445 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7446 // 7447 // map(i) 7448 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7449 // 7450 // map(i[1:23]) 7451 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7452 // 7453 // map(p) 7454 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7455 // 7456 // map(p[1:24]) 7457 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7458 // in unified shared memory mode or for local pointers 7459 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7460 // 7461 // map(s) 7462 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7463 // 7464 // map(s.i) 7465 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7466 // 7467 // map(s.s.f) 7468 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7469 // 7470 // map(s.p) 7471 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7472 // 7473 // map(to: s.p[:22]) 7474 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7475 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7476 // &(s.p), &(s.p[0]), 22*sizeof(double), 7477 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7478 // (*) alloc space for struct members, only this is a target parameter 7479 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7480 // optimizes this entry out, same in the examples below) 7481 // (***) map the pointee (map: to) 7482 // 7483 // map(s.ps) 7484 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7485 // 7486 // map(from: s.ps->s.i) 7487 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7488 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7489 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7490 // 7491 // map(to: s.ps->ps) 7492 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7493 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7494 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7495 // 7496 // map(s.ps->ps->ps) 7497 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7498 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7499 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7500 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7501 // 7502 // map(to: s.ps->ps->s.f[:22]) 7503 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7504 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7505 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7506 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7507 // 7508 // map(ps) 7509 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7510 // 7511 // map(ps->i) 7512 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7513 // 7514 // map(ps->s.f) 7515 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7516 // 7517 // map(from: ps->p) 7518 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7519 // 7520 // map(to: ps->p[:22]) 7521 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7522 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7523 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7524 // 7525 // map(ps->ps) 7526 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7527 // 7528 // map(from: ps->ps->s.i) 7529 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7530 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7531 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7532 // 7533 // map(from: ps->ps->ps) 7534 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7535 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7536 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7537 // 7538 // map(ps->ps->ps->ps) 7539 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7540 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7541 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7542 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7543 // 7544 // map(to: ps->ps->ps->s.f[:22]) 7545 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7546 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7547 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7548 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7549 // 7550 // map(to: s.f[:22]) map(from: s.p[:33]) 7551 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7552 // sizeof(double*) (**), TARGET_PARAM 7553 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7554 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7555 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7556 // (*) allocate contiguous space needed to fit all mapped members even if 7557 // we allocate space for members not mapped (in this example, 7558 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7559 // them as well because they fall between &s.f[0] and &s.p) 7560 // 7561 // map(from: s.f[:22]) map(to: ps->p[:33]) 7562 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7563 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7564 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7565 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7566 // (*) the struct this entry pertains to is the 2nd element in the list of 7567 // arguments, hence MEMBER_OF(2) 7568 // 7569 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7570 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7571 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7572 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7573 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7574 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7575 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7576 // (*) the struct this entry pertains to is the 4th element in the list 7577 // of arguments, hence MEMBER_OF(4) 7578 7579 // Track if the map information being generated is the first for a capture. 7580 bool IsCaptureFirstInfo = IsFirstComponentList; 7581 // When the variable is on a declare target link or in a to clause with 7582 // unified memory, a reference is needed to hold the host/device address 7583 // of the variable. 7584 bool RequiresReference = false; 7585 7586 // Scan the components from the base to the complete expression. 7587 auto CI = Components.rbegin(); 7588 auto CE = Components.rend(); 7589 auto I = CI; 7590 7591 // Track if the map information being generated is the first for a list of 7592 // components. 7593 bool IsExpressionFirstInfo = true; 7594 bool FirstPointerInComplexData = false; 7595 Address BP = Address::invalid(); 7596 const Expr *AssocExpr = I->getAssociatedExpression(); 7597 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7598 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7599 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7600 7601 if (isa<MemberExpr>(AssocExpr)) { 7602 // The base is the 'this' pointer. The content of the pointer is going 7603 // to be the base of the field being mapped. 7604 BP = CGF.LoadCXXThisAddress(); 7605 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7606 (OASE && 7607 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7608 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7609 } else if (OAShE && 7610 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7611 BP = Address( 7612 CGF.EmitScalarExpr(OAShE->getBase()), 7613 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7614 } else { 7615 // The base is the reference to the variable. 7616 // BP = &Var. 7617 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7618 if (const auto *VD = 7619 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7620 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7621 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7622 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7623 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7624 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7625 RequiresReference = true; 7626 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7627 } 7628 } 7629 } 7630 7631 // If the variable is a pointer and is being dereferenced (i.e. is not 7632 // the last component), the base has to be the pointer itself, not its 7633 // reference. References are ignored for mapping purposes. 7634 QualType Ty = 7635 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7636 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7637 // No need to generate individual map information for the pointer, it 7638 // can be associated with the combined storage if shared memory mode is 7639 // active or the base declaration is not global variable. 7640 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7641 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7642 !VD || VD->hasLocalStorage()) 7643 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7644 else 7645 FirstPointerInComplexData = true; 7646 ++I; 7647 } 7648 } 7649 7650 // Track whether a component of the list should be marked as MEMBER_OF some 7651 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7652 // in a component list should be marked as MEMBER_OF, all subsequent entries 7653 // do not belong to the base struct. E.g. 7654 // struct S2 s; 7655 // s.ps->ps->ps->f[:] 7656 // (1) (2) (3) (4) 7657 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7658 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7659 // is the pointee of ps(2) which is not member of struct s, so it should not 7660 // be marked as such (it is still PTR_AND_OBJ). 7661 // The variable is initialized to false so that PTR_AND_OBJ entries which 7662 // are not struct members are not considered (e.g. array of pointers to 7663 // data). 7664 bool ShouldBeMemberOf = false; 7665 7666 // Variable keeping track of whether or not we have encountered a component 7667 // in the component list which is a member expression. Useful when we have a 7668 // pointer or a final array section, in which case it is the previous 7669 // component in the list which tells us whether we have a member expression. 7670 // E.g. X.f[:] 7671 // While processing the final array section "[:]" it is "f" which tells us 7672 // whether we are dealing with a member of a declared struct. 7673 const MemberExpr *EncounteredME = nullptr; 7674 7675 // Track for the total number of dimension. Start from one for the dummy 7676 // dimension. 7677 uint64_t DimSize = 1; 7678 7679 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7680 7681 for (; I != CE; ++I) { 7682 // If the current component is member of a struct (parent struct) mark it. 7683 if (!EncounteredME) { 7684 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7685 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7686 // as MEMBER_OF the parent struct. 7687 if (EncounteredME) { 7688 ShouldBeMemberOf = true; 7689 // Do not emit as complex pointer if this is actually not array-like 7690 // expression. 7691 if (FirstPointerInComplexData) { 7692 QualType Ty = std::prev(I) 7693 ->getAssociatedDeclaration() 7694 ->getType() 7695 .getNonReferenceType(); 7696 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7697 FirstPointerInComplexData = false; 7698 } 7699 } 7700 } 7701 7702 auto Next = std::next(I); 7703 7704 // We need to generate the addresses and sizes if this is the last 7705 // component, if the component is a pointer or if it is an array section 7706 // whose length can't be proved to be one. If this is a pointer, it 7707 // becomes the base address for the following components. 7708 7709 // A final array section, is one whose length can't be proved to be one. 7710 // If the map item is non-contiguous then we don't treat any array section 7711 // as final array section. 7712 bool IsFinalArraySection = 7713 !IsNonContiguous && 7714 isFinalArraySectionExpression(I->getAssociatedExpression()); 7715 7716 // If we have a declaration for the mapping use that, otherwise use 7717 // the base declaration of the map clause. 7718 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7719 ? I->getAssociatedDeclaration() 7720 : BaseDecl; 7721 7722 // Get information on whether the element is a pointer. Have to do a 7723 // special treatment for array sections given that they are built-in 7724 // types. 7725 const auto *OASE = 7726 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7727 const auto *OAShE = 7728 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7729 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7730 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7731 bool IsPointer = 7732 OAShE || 7733 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7734 .getCanonicalType() 7735 ->isAnyPointerType()) || 7736 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7737 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7738 7739 if (OASE) 7740 ++DimSize; 7741 7742 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7743 // If this is not the last component, we expect the pointer to be 7744 // associated with an array expression or member expression. 7745 assert((Next == CE || 7746 isa<MemberExpr>(Next->getAssociatedExpression()) || 7747 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7748 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7749 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7750 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7751 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7752 "Unexpected expression"); 7753 7754 Address LB = Address::invalid(); 7755 if (OAShE) { 7756 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7757 CGF.getContext().getTypeAlignInChars( 7758 OAShE->getBase()->getType())); 7759 } else { 7760 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7761 .getAddress(CGF); 7762 } 7763 7764 // If this component is a pointer inside the base struct then we don't 7765 // need to create any entry for it - it will be combined with the object 7766 // it is pointing to into a single PTR_AND_OBJ entry. 7767 bool IsMemberPointerOrAddr = 7768 (IsPointer || ForDeviceAddr) && EncounteredME && 7769 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7770 EncounteredME); 7771 if (!OverlappedElements.empty() && Next == CE) { 7772 // Handle base element with the info for overlapped elements. 7773 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7774 assert(!IsPointer && 7775 "Unexpected base element with the pointer type."); 7776 // Mark the whole struct as the struct that requires allocation on the 7777 // device. 7778 PartialStruct.LowestElem = {0, LB}; 7779 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7780 I->getAssociatedExpression()->getType()); 7781 Address HB = CGF.Builder.CreateConstGEP( 7782 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7783 CGF.VoidPtrTy), 7784 TypeSize.getQuantity() - 1); 7785 PartialStruct.HighestElem = { 7786 std::numeric_limits<decltype( 7787 PartialStruct.HighestElem.first)>::max(), 7788 HB}; 7789 PartialStruct.Base = BP; 7790 PartialStruct.LB = LB; 7791 assert( 7792 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7793 "Overlapped elements must be used only once for the variable."); 7794 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7795 // Emit data for non-overlapped data. 7796 OpenMPOffloadMappingFlags Flags = 7797 OMP_MAP_MEMBER_OF | 7798 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7799 /*AddPtrFlag=*/false, 7800 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7801 llvm::Value *Size = nullptr; 7802 // Do bitcopy of all non-overlapped structure elements. 7803 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7804 Component : OverlappedElements) { 7805 Address ComponentLB = Address::invalid(); 7806 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7807 Component) { 7808 if (MC.getAssociatedDeclaration()) { 7809 ComponentLB = 7810 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7811 .getAddress(CGF); 7812 Size = CGF.Builder.CreatePtrDiff( 7813 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7814 CGF.EmitCastToVoidPtr(LB.getPointer())); 7815 break; 7816 } 7817 } 7818 assert(Size && "Failed to determine structure size"); 7819 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7820 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7821 CombinedInfo.Pointers.push_back(LB.getPointer()); 7822 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7823 Size, CGF.Int64Ty, /*isSigned=*/true)); 7824 CombinedInfo.Types.push_back(Flags); 7825 CombinedInfo.Mappers.push_back(nullptr); 7826 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7827 : 1); 7828 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7829 } 7830 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7831 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7832 CombinedInfo.Pointers.push_back(LB.getPointer()); 7833 Size = CGF.Builder.CreatePtrDiff( 7834 CGF.EmitCastToVoidPtr( 7835 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7836 CGF.EmitCastToVoidPtr(LB.getPointer())); 7837 CombinedInfo.Sizes.push_back( 7838 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7839 CombinedInfo.Types.push_back(Flags); 7840 CombinedInfo.Mappers.push_back(nullptr); 7841 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7842 : 1); 7843 break; 7844 } 7845 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7846 if (!IsMemberPointerOrAddr || 7847 (Next == CE && MapType != OMPC_MAP_unknown)) { 7848 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7849 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7850 CombinedInfo.Pointers.push_back(LB.getPointer()); 7851 CombinedInfo.Sizes.push_back( 7852 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7853 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7854 : 1); 7855 7856 // If Mapper is valid, the last component inherits the mapper. 7857 bool HasMapper = Mapper && Next == CE; 7858 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7859 7860 // We need to add a pointer flag for each map that comes from the 7861 // same expression except for the first one. We also need to signal 7862 // this map is the first one that relates with the current capture 7863 // (there is a set of entries for each capture). 7864 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7865 MapType, MapModifiers, MotionModifiers, IsImplicit, 7866 !IsExpressionFirstInfo || RequiresReference || 7867 FirstPointerInComplexData, 7868 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7869 7870 if (!IsExpressionFirstInfo) { 7871 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7872 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7873 if (IsPointer) 7874 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7875 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7876 7877 if (ShouldBeMemberOf) { 7878 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7879 // should be later updated with the correct value of MEMBER_OF. 7880 Flags |= OMP_MAP_MEMBER_OF; 7881 // From now on, all subsequent PTR_AND_OBJ entries should not be 7882 // marked as MEMBER_OF. 7883 ShouldBeMemberOf = false; 7884 } 7885 } 7886 7887 CombinedInfo.Types.push_back(Flags); 7888 } 7889 7890 // If we have encountered a member expression so far, keep track of the 7891 // mapped member. If the parent is "*this", then the value declaration 7892 // is nullptr. 7893 if (EncounteredME) { 7894 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7895 unsigned FieldIndex = FD->getFieldIndex(); 7896 7897 // Update info about the lowest and highest elements for this struct 7898 if (!PartialStruct.Base.isValid()) { 7899 PartialStruct.LowestElem = {FieldIndex, LB}; 7900 if (IsFinalArraySection) { 7901 Address HB = 7902 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7903 .getAddress(CGF); 7904 PartialStruct.HighestElem = {FieldIndex, HB}; 7905 } else { 7906 PartialStruct.HighestElem = {FieldIndex, LB}; 7907 } 7908 PartialStruct.Base = BP; 7909 PartialStruct.LB = BP; 7910 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7911 PartialStruct.LowestElem = {FieldIndex, LB}; 7912 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7913 PartialStruct.HighestElem = {FieldIndex, LB}; 7914 } 7915 } 7916 7917 // Need to emit combined struct for array sections. 7918 if (IsFinalArraySection || IsNonContiguous) 7919 PartialStruct.IsArraySection = true; 7920 7921 // If we have a final array section, we are done with this expression. 7922 if (IsFinalArraySection) 7923 break; 7924 7925 // The pointer becomes the base for the next element. 7926 if (Next != CE) 7927 BP = LB; 7928 7929 IsExpressionFirstInfo = false; 7930 IsCaptureFirstInfo = false; 7931 FirstPointerInComplexData = false; 7932 } else if (FirstPointerInComplexData) { 7933 QualType Ty = Components.rbegin() 7934 ->getAssociatedDeclaration() 7935 ->getType() 7936 .getNonReferenceType(); 7937 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7938 FirstPointerInComplexData = false; 7939 } 7940 } 7941 // If ran into the whole component - allocate the space for the whole 7942 // record. 7943 if (!EncounteredME) 7944 PartialStruct.HasCompleteRecord = true; 7945 7946 if (!IsNonContiguous) 7947 return; 7948 7949 const ASTContext &Context = CGF.getContext(); 7950 7951 // For supporting stride in array section, we need to initialize the first 7952 // dimension size as 1, first offset as 0, and first count as 1 7953 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7954 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7955 MapValuesArrayTy CurStrides; 7956 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7957 uint64_t ElementTypeSize; 7958 7959 // Collect Size information for each dimension and get the element size as 7960 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7961 // should be [10, 10] and the first stride is 4 btyes. 7962 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7963 Components) { 7964 const Expr *AssocExpr = Component.getAssociatedExpression(); 7965 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7966 7967 if (!OASE) 7968 continue; 7969 7970 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7971 auto *CAT = Context.getAsConstantArrayType(Ty); 7972 auto *VAT = Context.getAsVariableArrayType(Ty); 7973 7974 // We need all the dimension size except for the last dimension. 7975 assert((VAT || CAT || &Component == &*Components.begin()) && 7976 "Should be either ConstantArray or VariableArray if not the " 7977 "first Component"); 7978 7979 // Get element size if CurStrides is empty. 7980 if (CurStrides.empty()) { 7981 const Type *ElementType = nullptr; 7982 if (CAT) 7983 ElementType = CAT->getElementType().getTypePtr(); 7984 else if (VAT) 7985 ElementType = VAT->getElementType().getTypePtr(); 7986 else 7987 assert(&Component == &*Components.begin() && 7988 "Only expect pointer (non CAT or VAT) when this is the " 7989 "first Component"); 7990 // If ElementType is null, then it means the base is a pointer 7991 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7992 // for next iteration. 7993 if (ElementType) { 7994 // For the case that having pointer as base, we need to remove one 7995 // level of indirection. 7996 if (&Component != &*Components.begin()) 7997 ElementType = ElementType->getPointeeOrArrayElementType(); 7998 ElementTypeSize = 7999 Context.getTypeSizeInChars(ElementType).getQuantity(); 8000 CurStrides.push_back( 8001 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8002 } 8003 } 8004 // Get dimension value except for the last dimension since we don't need 8005 // it. 8006 if (DimSizes.size() < Components.size() - 1) { 8007 if (CAT) 8008 DimSizes.push_back(llvm::ConstantInt::get( 8009 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8010 else if (VAT) 8011 DimSizes.push_back(CGF.Builder.CreateIntCast( 8012 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8013 /*IsSigned=*/false)); 8014 } 8015 } 8016 8017 // Skip the dummy dimension since we have already have its information. 8018 auto DI = DimSizes.begin() + 1; 8019 // Product of dimension. 8020 llvm::Value *DimProd = 8021 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8022 8023 // Collect info for non-contiguous. Notice that offset, count, and stride 8024 // are only meaningful for array-section, so we insert a null for anything 8025 // other than array-section. 8026 // Also, the size of offset, count, and stride are not the same as 8027 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8028 // count, and stride are the same as the number of non-contiguous 8029 // declaration in target update to/from clause. 8030 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8031 Components) { 8032 const Expr *AssocExpr = Component.getAssociatedExpression(); 8033 8034 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8035 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8036 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8037 /*isSigned=*/false); 8038 CurOffsets.push_back(Offset); 8039 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8040 CurStrides.push_back(CurStrides.back()); 8041 continue; 8042 } 8043 8044 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8045 8046 if (!OASE) 8047 continue; 8048 8049 // Offset 8050 const Expr *OffsetExpr = OASE->getLowerBound(); 8051 llvm::Value *Offset = nullptr; 8052 if (!OffsetExpr) { 8053 // If offset is absent, then we just set it to zero. 8054 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8055 } else { 8056 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8057 CGF.Int64Ty, 8058 /*isSigned=*/false); 8059 } 8060 CurOffsets.push_back(Offset); 8061 8062 // Count 8063 const Expr *CountExpr = OASE->getLength(); 8064 llvm::Value *Count = nullptr; 8065 if (!CountExpr) { 8066 // In Clang, once a high dimension is an array section, we construct all 8067 // the lower dimension as array section, however, for case like 8068 // arr[0:2][2], Clang construct the inner dimension as an array section 8069 // but it actually is not in an array section form according to spec. 8070 if (!OASE->getColonLocFirst().isValid() && 8071 !OASE->getColonLocSecond().isValid()) { 8072 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8073 } else { 8074 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8075 // When the length is absent it defaults to ⌈(size − 8076 // lower-bound)/stride⌉, where size is the size of the array 8077 // dimension. 8078 const Expr *StrideExpr = OASE->getStride(); 8079 llvm::Value *Stride = 8080 StrideExpr 8081 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8082 CGF.Int64Ty, /*isSigned=*/false) 8083 : nullptr; 8084 if (Stride) 8085 Count = CGF.Builder.CreateUDiv( 8086 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8087 else 8088 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8089 } 8090 } else { 8091 Count = CGF.EmitScalarExpr(CountExpr); 8092 } 8093 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8094 CurCounts.push_back(Count); 8095 8096 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8097 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8098 // Offset Count Stride 8099 // D0 0 1 4 (int) <- dummy dimension 8100 // D1 0 2 8 (2 * (1) * 4) 8101 // D2 1 2 20 (1 * (1 * 5) * 4) 8102 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8103 const Expr *StrideExpr = OASE->getStride(); 8104 llvm::Value *Stride = 8105 StrideExpr 8106 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8107 CGF.Int64Ty, /*isSigned=*/false) 8108 : nullptr; 8109 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8110 if (Stride) 8111 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8112 else 8113 CurStrides.push_back(DimProd); 8114 if (DI != DimSizes.end()) 8115 ++DI; 8116 } 8117 8118 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8119 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8120 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8121 } 8122 8123 /// Return the adjusted map modifiers if the declaration a capture refers to 8124 /// appears in a first-private clause. This is expected to be used only with 8125 /// directives that start with 'target'. 8126 MappableExprsHandler::OpenMPOffloadMappingFlags 8127 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8128 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8129 8130 // A first private variable captured by reference will use only the 8131 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8132 // declaration is known as first-private in this handler. 8133 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8134 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8135 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8136 return MappableExprsHandler::OMP_MAP_ALWAYS | 8137 MappableExprsHandler::OMP_MAP_TO; 8138 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8139 return MappableExprsHandler::OMP_MAP_TO | 8140 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8141 return MappableExprsHandler::OMP_MAP_PRIVATE | 8142 MappableExprsHandler::OMP_MAP_TO; 8143 } 8144 return MappableExprsHandler::OMP_MAP_TO | 8145 MappableExprsHandler::OMP_MAP_FROM; 8146 } 8147 8148 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8149 // Rotate by getFlagMemberOffset() bits. 8150 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8151 << getFlagMemberOffset()); 8152 } 8153 8154 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8155 OpenMPOffloadMappingFlags MemberOfFlag) { 8156 // If the entry is PTR_AND_OBJ but has not been marked with the special 8157 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8158 // marked as MEMBER_OF. 8159 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8160 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8161 return; 8162 8163 // Reset the placeholder value to prepare the flag for the assignment of the 8164 // proper MEMBER_OF value. 8165 Flags &= ~OMP_MAP_MEMBER_OF; 8166 Flags |= MemberOfFlag; 8167 } 8168 8169 void getPlainLayout(const CXXRecordDecl *RD, 8170 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8171 bool AsBase) const { 8172 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8173 8174 llvm::StructType *St = 8175 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8176 8177 unsigned NumElements = St->getNumElements(); 8178 llvm::SmallVector< 8179 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8180 RecordLayout(NumElements); 8181 8182 // Fill bases. 8183 for (const auto &I : RD->bases()) { 8184 if (I.isVirtual()) 8185 continue; 8186 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8187 // Ignore empty bases. 8188 if (Base->isEmpty() || CGF.getContext() 8189 .getASTRecordLayout(Base) 8190 .getNonVirtualSize() 8191 .isZero()) 8192 continue; 8193 8194 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8195 RecordLayout[FieldIndex] = Base; 8196 } 8197 // Fill in virtual bases. 8198 for (const auto &I : RD->vbases()) { 8199 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8200 // Ignore empty bases. 8201 if (Base->isEmpty()) 8202 continue; 8203 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8204 if (RecordLayout[FieldIndex]) 8205 continue; 8206 RecordLayout[FieldIndex] = Base; 8207 } 8208 // Fill in all the fields. 8209 assert(!RD->isUnion() && "Unexpected union."); 8210 for (const auto *Field : RD->fields()) { 8211 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8212 // will fill in later.) 8213 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8214 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8215 RecordLayout[FieldIndex] = Field; 8216 } 8217 } 8218 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8219 &Data : RecordLayout) { 8220 if (Data.isNull()) 8221 continue; 8222 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8223 getPlainLayout(Base, Layout, /*AsBase=*/true); 8224 else 8225 Layout.push_back(Data.get<const FieldDecl *>()); 8226 } 8227 } 8228 8229 /// Generate all the base pointers, section pointers, sizes, map types, and 8230 /// mappers for the extracted mappable expressions (all included in \a 8231 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8232 /// pair of the relevant declaration and index where it occurs is appended to 8233 /// the device pointers info array. 8234 void generateAllInfoForClauses( 8235 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8236 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8237 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8238 // We have to process the component lists that relate with the same 8239 // declaration in a single chunk so that we can generate the map flags 8240 // correctly. Therefore, we organize all lists in a map. 8241 enum MapKind { Present, Allocs, Other, Total }; 8242 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8243 SmallVector<SmallVector<MapInfo, 8>, 4>> 8244 Info; 8245 8246 // Helper function to fill the information map for the different supported 8247 // clauses. 8248 auto &&InfoGen = 8249 [&Info, &SkipVarSet]( 8250 const ValueDecl *D, MapKind Kind, 8251 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8252 OpenMPMapClauseKind MapType, 8253 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8254 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8255 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8256 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8257 if (SkipVarSet.contains(D)) 8258 return; 8259 auto It = Info.find(D); 8260 if (It == Info.end()) 8261 It = Info 8262 .insert(std::make_pair( 8263 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8264 .first; 8265 It->second[Kind].emplace_back( 8266 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8267 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8268 }; 8269 8270 for (const auto *Cl : Clauses) { 8271 const auto *C = dyn_cast<OMPMapClause>(Cl); 8272 if (!C) 8273 continue; 8274 MapKind Kind = Other; 8275 if (!C->getMapTypeModifiers().empty() && 8276 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8277 return K == OMPC_MAP_MODIFIER_present; 8278 })) 8279 Kind = Present; 8280 else if (C->getMapType() == OMPC_MAP_alloc) 8281 Kind = Allocs; 8282 const auto *EI = C->getVarRefs().begin(); 8283 for (const auto L : C->component_lists()) { 8284 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8285 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8286 C->getMapTypeModifiers(), llvm::None, 8287 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8288 E); 8289 ++EI; 8290 } 8291 } 8292 for (const auto *Cl : Clauses) { 8293 const auto *C = dyn_cast<OMPToClause>(Cl); 8294 if (!C) 8295 continue; 8296 MapKind Kind = Other; 8297 if (!C->getMotionModifiers().empty() && 8298 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8299 return K == OMPC_MOTION_MODIFIER_present; 8300 })) 8301 Kind = Present; 8302 const auto *EI = C->getVarRefs().begin(); 8303 for (const auto L : C->component_lists()) { 8304 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8305 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8306 C->isImplicit(), std::get<2>(L), *EI); 8307 ++EI; 8308 } 8309 } 8310 for (const auto *Cl : Clauses) { 8311 const auto *C = dyn_cast<OMPFromClause>(Cl); 8312 if (!C) 8313 continue; 8314 MapKind Kind = Other; 8315 if (!C->getMotionModifiers().empty() && 8316 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8317 return K == OMPC_MOTION_MODIFIER_present; 8318 })) 8319 Kind = Present; 8320 const auto *EI = C->getVarRefs().begin(); 8321 for (const auto L : C->component_lists()) { 8322 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8323 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8324 C->isImplicit(), std::get<2>(L), *EI); 8325 ++EI; 8326 } 8327 } 8328 8329 // Look at the use_device_ptr clause information and mark the existing map 8330 // entries as such. If there is no map information for an entry in the 8331 // use_device_ptr list, we create one with map type 'alloc' and zero size 8332 // section. It is the user fault if that was not mapped before. If there is 8333 // no map information and the pointer is a struct member, then we defer the 8334 // emission of that entry until the whole struct has been processed. 8335 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8336 SmallVector<DeferredDevicePtrEntryTy, 4>> 8337 DeferredInfo; 8338 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8339 8340 for (const auto *Cl : Clauses) { 8341 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8342 if (!C) 8343 continue; 8344 for (const auto L : C->component_lists()) { 8345 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8346 std::get<1>(L); 8347 assert(!Components.empty() && 8348 "Not expecting empty list of components!"); 8349 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8350 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8351 const Expr *IE = Components.back().getAssociatedExpression(); 8352 // If the first component is a member expression, we have to look into 8353 // 'this', which maps to null in the map of map information. Otherwise 8354 // look directly for the information. 8355 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8356 8357 // We potentially have map information for this declaration already. 8358 // Look for the first set of components that refer to it. 8359 if (It != Info.end()) { 8360 bool Found = false; 8361 for (auto &Data : It->second) { 8362 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8363 return MI.Components.back().getAssociatedDeclaration() == VD; 8364 }); 8365 // If we found a map entry, signal that the pointer has to be 8366 // returned and move on to the next declaration. Exclude cases where 8367 // the base pointer is mapped as array subscript, array section or 8368 // array shaping. The base address is passed as a pointer to base in 8369 // this case and cannot be used as a base for use_device_ptr list 8370 // item. 8371 if (CI != Data.end()) { 8372 auto PrevCI = std::next(CI->Components.rbegin()); 8373 const auto *VarD = dyn_cast<VarDecl>(VD); 8374 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8375 isa<MemberExpr>(IE) || 8376 !VD->getType().getNonReferenceType()->isPointerType() || 8377 PrevCI == CI->Components.rend() || 8378 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8379 VarD->hasLocalStorage()) { 8380 CI->ReturnDevicePointer = true; 8381 Found = true; 8382 break; 8383 } 8384 } 8385 } 8386 if (Found) 8387 continue; 8388 } 8389 8390 // We didn't find any match in our map information - generate a zero 8391 // size array section - if the pointer is a struct member we defer this 8392 // action until the whole struct has been processed. 8393 if (isa<MemberExpr>(IE)) { 8394 // Insert the pointer into Info to be processed by 8395 // generateInfoForComponentList. Because it is a member pointer 8396 // without a pointee, no entry will be generated for it, therefore 8397 // we need to generate one after the whole struct has been processed. 8398 // Nonetheless, generateInfoForComponentList must be called to take 8399 // the pointer into account for the calculation of the range of the 8400 // partial struct. 8401 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8402 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8403 nullptr); 8404 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8405 } else { 8406 llvm::Value *Ptr = 8407 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8408 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8409 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8410 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8411 UseDevicePtrCombinedInfo.Sizes.push_back( 8412 llvm::Constant::getNullValue(CGF.Int64Ty)); 8413 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8414 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8415 } 8416 } 8417 } 8418 8419 // Look at the use_device_addr clause information and mark the existing map 8420 // entries as such. If there is no map information for an entry in the 8421 // use_device_addr list, we create one with map type 'alloc' and zero size 8422 // section. It is the user fault if that was not mapped before. If there is 8423 // no map information and the pointer is a struct member, then we defer the 8424 // emission of that entry until the whole struct has been processed. 8425 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8426 for (const auto *Cl : Clauses) { 8427 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8428 if (!C) 8429 continue; 8430 for (const auto L : C->component_lists()) { 8431 assert(!std::get<1>(L).empty() && 8432 "Not expecting empty list of components!"); 8433 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8434 if (!Processed.insert(VD).second) 8435 continue; 8436 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8437 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8438 // If the first component is a member expression, we have to look into 8439 // 'this', which maps to null in the map of map information. Otherwise 8440 // look directly for the information. 8441 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8442 8443 // We potentially have map information for this declaration already. 8444 // Look for the first set of components that refer to it. 8445 if (It != Info.end()) { 8446 bool Found = false; 8447 for (auto &Data : It->second) { 8448 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8449 return MI.Components.back().getAssociatedDeclaration() == VD; 8450 }); 8451 // If we found a map entry, signal that the pointer has to be 8452 // returned and move on to the next declaration. 8453 if (CI != Data.end()) { 8454 CI->ReturnDevicePointer = true; 8455 Found = true; 8456 break; 8457 } 8458 } 8459 if (Found) 8460 continue; 8461 } 8462 8463 // We didn't find any match in our map information - generate a zero 8464 // size array section - if the pointer is a struct member we defer this 8465 // action until the whole struct has been processed. 8466 if (isa<MemberExpr>(IE)) { 8467 // Insert the pointer into Info to be processed by 8468 // generateInfoForComponentList. Because it is a member pointer 8469 // without a pointee, no entry will be generated for it, therefore 8470 // we need to generate one after the whole struct has been processed. 8471 // Nonetheless, generateInfoForComponentList must be called to take 8472 // the pointer into account for the calculation of the range of the 8473 // partial struct. 8474 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8475 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8476 nullptr, nullptr, /*ForDeviceAddr=*/true); 8477 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8478 } else { 8479 llvm::Value *Ptr; 8480 if (IE->isGLValue()) 8481 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8482 else 8483 Ptr = CGF.EmitScalarExpr(IE); 8484 CombinedInfo.Exprs.push_back(VD); 8485 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8486 CombinedInfo.Pointers.push_back(Ptr); 8487 CombinedInfo.Sizes.push_back( 8488 llvm::Constant::getNullValue(CGF.Int64Ty)); 8489 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8490 CombinedInfo.Mappers.push_back(nullptr); 8491 } 8492 } 8493 } 8494 8495 for (const auto &Data : Info) { 8496 StructRangeInfoTy PartialStruct; 8497 // Temporary generated information. 8498 MapCombinedInfoTy CurInfo; 8499 const Decl *D = Data.first; 8500 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8501 for (const auto &M : Data.second) { 8502 for (const MapInfo &L : M) { 8503 assert(!L.Components.empty() && 8504 "Not expecting declaration with no component lists."); 8505 8506 // Remember the current base pointer index. 8507 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8508 CurInfo.NonContigInfo.IsNonContiguous = 8509 L.Components.back().isNonContiguous(); 8510 generateInfoForComponentList( 8511 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8512 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8513 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8514 8515 // If this entry relates with a device pointer, set the relevant 8516 // declaration and add the 'return pointer' flag. 8517 if (L.ReturnDevicePointer) { 8518 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8519 "Unexpected number of mapped base pointers."); 8520 8521 const ValueDecl *RelevantVD = 8522 L.Components.back().getAssociatedDeclaration(); 8523 assert(RelevantVD && 8524 "No relevant declaration related with device pointer??"); 8525 8526 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8527 RelevantVD); 8528 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8529 } 8530 } 8531 } 8532 8533 // Append any pending zero-length pointers which are struct members and 8534 // used with use_device_ptr or use_device_addr. 8535 auto CI = DeferredInfo.find(Data.first); 8536 if (CI != DeferredInfo.end()) { 8537 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8538 llvm::Value *BasePtr; 8539 llvm::Value *Ptr; 8540 if (L.ForDeviceAddr) { 8541 if (L.IE->isGLValue()) 8542 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8543 else 8544 Ptr = this->CGF.EmitScalarExpr(L.IE); 8545 BasePtr = Ptr; 8546 // Entry is RETURN_PARAM. Also, set the placeholder value 8547 // MEMBER_OF=FFFF so that the entry is later updated with the 8548 // correct value of MEMBER_OF. 8549 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8550 } else { 8551 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8552 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8553 L.IE->getExprLoc()); 8554 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8555 // placeholder value MEMBER_OF=FFFF so that the entry is later 8556 // updated with the correct value of MEMBER_OF. 8557 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8558 OMP_MAP_MEMBER_OF); 8559 } 8560 CurInfo.Exprs.push_back(L.VD); 8561 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8562 CurInfo.Pointers.push_back(Ptr); 8563 CurInfo.Sizes.push_back( 8564 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8565 CurInfo.Mappers.push_back(nullptr); 8566 } 8567 } 8568 // If there is an entry in PartialStruct it means we have a struct with 8569 // individual members mapped. Emit an extra combined entry. 8570 if (PartialStruct.Base.isValid()) { 8571 CurInfo.NonContigInfo.Dims.push_back(0); 8572 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8573 } 8574 8575 // We need to append the results of this capture to what we already 8576 // have. 8577 CombinedInfo.append(CurInfo); 8578 } 8579 // Append data for use_device_ptr clauses. 8580 CombinedInfo.append(UseDevicePtrCombinedInfo); 8581 } 8582 8583 public: 8584 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8585 : CurDir(&Dir), CGF(CGF) { 8586 // Extract firstprivate clause information. 8587 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8588 for (const auto *D : C->varlists()) 8589 FirstPrivateDecls.try_emplace( 8590 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8591 // Extract implicit firstprivates from uses_allocators clauses. 8592 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8593 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8594 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8595 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8596 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8597 /*Implicit=*/true); 8598 else if (const auto *VD = dyn_cast<VarDecl>( 8599 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8600 ->getDecl())) 8601 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8602 } 8603 } 8604 // Extract device pointer clause information. 8605 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8606 for (auto L : C->component_lists()) 8607 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8608 } 8609 8610 /// Constructor for the declare mapper directive. 8611 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8612 : CurDir(&Dir), CGF(CGF) {} 8613 8614 /// Generate code for the combined entry if we have a partially mapped struct 8615 /// and take care of the mapping flags of the arguments corresponding to 8616 /// individual struct members. 8617 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8618 MapFlagsArrayTy &CurTypes, 8619 const StructRangeInfoTy &PartialStruct, 8620 const ValueDecl *VD = nullptr, 8621 bool NotTargetParams = true) const { 8622 if (CurTypes.size() == 1 && 8623 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8624 !PartialStruct.IsArraySection) 8625 return; 8626 Address LBAddr = PartialStruct.LowestElem.second; 8627 Address HBAddr = PartialStruct.HighestElem.second; 8628 if (PartialStruct.HasCompleteRecord) { 8629 LBAddr = PartialStruct.LB; 8630 HBAddr = PartialStruct.LB; 8631 } 8632 CombinedInfo.Exprs.push_back(VD); 8633 // Base is the base of the struct 8634 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8635 // Pointer is the address of the lowest element 8636 llvm::Value *LB = LBAddr.getPointer(); 8637 CombinedInfo.Pointers.push_back(LB); 8638 // There should not be a mapper for a combined entry. 8639 CombinedInfo.Mappers.push_back(nullptr); 8640 // Size is (addr of {highest+1} element) - (addr of lowest element) 8641 llvm::Value *HB = HBAddr.getPointer(); 8642 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8643 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8644 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8645 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8646 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8647 /*isSigned=*/false); 8648 CombinedInfo.Sizes.push_back(Size); 8649 // Map type is always TARGET_PARAM, if generate info for captures. 8650 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8651 : OMP_MAP_TARGET_PARAM); 8652 // If any element has the present modifier, then make sure the runtime 8653 // doesn't attempt to allocate the struct. 8654 if (CurTypes.end() != 8655 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8656 return Type & OMP_MAP_PRESENT; 8657 })) 8658 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8659 // Remove TARGET_PARAM flag from the first element 8660 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8661 8662 // All other current entries will be MEMBER_OF the combined entry 8663 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8664 // 0xFFFF in the MEMBER_OF field). 8665 OpenMPOffloadMappingFlags MemberOfFlag = 8666 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8667 for (auto &M : CurTypes) 8668 setCorrectMemberOfFlag(M, MemberOfFlag); 8669 } 8670 8671 /// Generate all the base pointers, section pointers, sizes, map types, and 8672 /// mappers for the extracted mappable expressions (all included in \a 8673 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8674 /// pair of the relevant declaration and index where it occurs is appended to 8675 /// the device pointers info array. 8676 void generateAllInfo( 8677 MapCombinedInfoTy &CombinedInfo, 8678 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8679 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8680 assert(CurDir.is<const OMPExecutableDirective *>() && 8681 "Expect a executable directive"); 8682 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8683 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8684 } 8685 8686 /// Generate all the base pointers, section pointers, sizes, map types, and 8687 /// mappers for the extracted map clauses of user-defined mapper (all included 8688 /// in \a CombinedInfo). 8689 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8690 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8691 "Expect a declare mapper directive"); 8692 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8693 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8694 } 8695 8696 /// Emit capture info for lambdas for variables captured by reference. 8697 void generateInfoForLambdaCaptures( 8698 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8699 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8700 const auto *RD = VD->getType() 8701 .getCanonicalType() 8702 .getNonReferenceType() 8703 ->getAsCXXRecordDecl(); 8704 if (!RD || !RD->isLambda()) 8705 return; 8706 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8707 LValue VDLVal = CGF.MakeAddrLValue( 8708 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8709 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8710 FieldDecl *ThisCapture = nullptr; 8711 RD->getCaptureFields(Captures, ThisCapture); 8712 if (ThisCapture) { 8713 LValue ThisLVal = 8714 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8715 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8716 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8717 VDLVal.getPointer(CGF)); 8718 CombinedInfo.Exprs.push_back(VD); 8719 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8720 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8721 CombinedInfo.Sizes.push_back( 8722 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8723 CGF.Int64Ty, /*isSigned=*/true)); 8724 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8725 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8726 CombinedInfo.Mappers.push_back(nullptr); 8727 } 8728 for (const LambdaCapture &LC : RD->captures()) { 8729 if (!LC.capturesVariable()) 8730 continue; 8731 const VarDecl *VD = LC.getCapturedVar(); 8732 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8733 continue; 8734 auto It = Captures.find(VD); 8735 assert(It != Captures.end() && "Found lambda capture without field."); 8736 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8737 if (LC.getCaptureKind() == LCK_ByRef) { 8738 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8739 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8740 VDLVal.getPointer(CGF)); 8741 CombinedInfo.Exprs.push_back(VD); 8742 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8743 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8744 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8745 CGF.getTypeSize( 8746 VD->getType().getCanonicalType().getNonReferenceType()), 8747 CGF.Int64Ty, /*isSigned=*/true)); 8748 } else { 8749 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8750 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8751 VDLVal.getPointer(CGF)); 8752 CombinedInfo.Exprs.push_back(VD); 8753 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8754 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8755 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8756 } 8757 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8758 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8759 CombinedInfo.Mappers.push_back(nullptr); 8760 } 8761 } 8762 8763 /// Set correct indices for lambdas captures. 8764 void adjustMemberOfForLambdaCaptures( 8765 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8766 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8767 MapFlagsArrayTy &Types) const { 8768 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8769 // Set correct member_of idx for all implicit lambda captures. 8770 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8771 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8772 continue; 8773 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8774 assert(BasePtr && "Unable to find base lambda address."); 8775 int TgtIdx = -1; 8776 for (unsigned J = I; J > 0; --J) { 8777 unsigned Idx = J - 1; 8778 if (Pointers[Idx] != BasePtr) 8779 continue; 8780 TgtIdx = Idx; 8781 break; 8782 } 8783 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8784 // All other current entries will be MEMBER_OF the combined entry 8785 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8786 // 0xFFFF in the MEMBER_OF field). 8787 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8788 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8789 } 8790 } 8791 8792 /// Generate the base pointers, section pointers, sizes, map types, and 8793 /// mappers associated to a given capture (all included in \a CombinedInfo). 8794 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8795 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8796 StructRangeInfoTy &PartialStruct) const { 8797 assert(!Cap->capturesVariableArrayType() && 8798 "Not expecting to generate map info for a variable array type!"); 8799 8800 // We need to know when we generating information for the first component 8801 const ValueDecl *VD = Cap->capturesThis() 8802 ? nullptr 8803 : Cap->getCapturedVar()->getCanonicalDecl(); 8804 8805 // If this declaration appears in a is_device_ptr clause we just have to 8806 // pass the pointer by value. If it is a reference to a declaration, we just 8807 // pass its value. 8808 if (DevPointersMap.count(VD)) { 8809 CombinedInfo.Exprs.push_back(VD); 8810 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8811 CombinedInfo.Pointers.push_back(Arg); 8812 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8813 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8814 /*isSigned=*/true)); 8815 CombinedInfo.Types.push_back( 8816 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8817 OMP_MAP_TARGET_PARAM); 8818 CombinedInfo.Mappers.push_back(nullptr); 8819 return; 8820 } 8821 8822 using MapData = 8823 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8824 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8825 const ValueDecl *, const Expr *>; 8826 SmallVector<MapData, 4> DeclComponentLists; 8827 assert(CurDir.is<const OMPExecutableDirective *>() && 8828 "Expect a executable directive"); 8829 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8830 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8831 const auto *EI = C->getVarRefs().begin(); 8832 for (const auto L : C->decl_component_lists(VD)) { 8833 const ValueDecl *VDecl, *Mapper; 8834 // The Expression is not correct if the mapping is implicit 8835 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8836 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8837 std::tie(VDecl, Components, Mapper) = L; 8838 assert(VDecl == VD && "We got information for the wrong declaration??"); 8839 assert(!Components.empty() && 8840 "Not expecting declaration with no component lists."); 8841 DeclComponentLists.emplace_back(Components, C->getMapType(), 8842 C->getMapTypeModifiers(), 8843 C->isImplicit(), Mapper, E); 8844 ++EI; 8845 } 8846 } 8847 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8848 const MapData &RHS) { 8849 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8850 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8851 bool HasPresent = !MapModifiers.empty() && 8852 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8853 return K == clang::OMPC_MAP_MODIFIER_present; 8854 }); 8855 bool HasAllocs = MapType == OMPC_MAP_alloc; 8856 MapModifiers = std::get<2>(RHS); 8857 MapType = std::get<1>(LHS); 8858 bool HasPresentR = 8859 !MapModifiers.empty() && 8860 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8861 return K == clang::OMPC_MAP_MODIFIER_present; 8862 }); 8863 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8864 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8865 }); 8866 8867 // Find overlapping elements (including the offset from the base element). 8868 llvm::SmallDenseMap< 8869 const MapData *, 8870 llvm::SmallVector< 8871 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8872 4> 8873 OverlappedData; 8874 size_t Count = 0; 8875 for (const MapData &L : DeclComponentLists) { 8876 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8877 OpenMPMapClauseKind MapType; 8878 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8879 bool IsImplicit; 8880 const ValueDecl *Mapper; 8881 const Expr *VarRef; 8882 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8883 L; 8884 ++Count; 8885 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8886 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8887 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8888 VarRef) = L1; 8889 auto CI = Components.rbegin(); 8890 auto CE = Components.rend(); 8891 auto SI = Components1.rbegin(); 8892 auto SE = Components1.rend(); 8893 for (; CI != CE && SI != SE; ++CI, ++SI) { 8894 if (CI->getAssociatedExpression()->getStmtClass() != 8895 SI->getAssociatedExpression()->getStmtClass()) 8896 break; 8897 // Are we dealing with different variables/fields? 8898 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8899 break; 8900 } 8901 // Found overlapping if, at least for one component, reached the head 8902 // of the components list. 8903 if (CI == CE || SI == SE) { 8904 // Ignore it if it is the same component. 8905 if (CI == CE && SI == SE) 8906 continue; 8907 const auto It = (SI == SE) ? CI : SI; 8908 // If one component is a pointer and another one is a kind of 8909 // dereference of this pointer (array subscript, section, dereference, 8910 // etc.), it is not an overlapping. 8911 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8912 std::prev(It) 8913 ->getAssociatedExpression() 8914 ->getType() 8915 .getNonReferenceType() 8916 ->isPointerType()) 8917 continue; 8918 const MapData &BaseData = CI == CE ? L : L1; 8919 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8920 SI == SE ? Components : Components1; 8921 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8922 OverlappedElements.getSecond().push_back(SubData); 8923 } 8924 } 8925 } 8926 // Sort the overlapped elements for each item. 8927 llvm::SmallVector<const FieldDecl *, 4> Layout; 8928 if (!OverlappedData.empty()) { 8929 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8930 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8931 while (BaseType != OrigType) { 8932 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8933 OrigType = BaseType->getPointeeOrArrayElementType(); 8934 } 8935 8936 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8937 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8938 else { 8939 const auto *RD = BaseType->getAsRecordDecl(); 8940 Layout.append(RD->field_begin(), RD->field_end()); 8941 } 8942 } 8943 for (auto &Pair : OverlappedData) { 8944 llvm::stable_sort( 8945 Pair.getSecond(), 8946 [&Layout]( 8947 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8948 OMPClauseMappableExprCommon::MappableExprComponentListRef 8949 Second) { 8950 auto CI = First.rbegin(); 8951 auto CE = First.rend(); 8952 auto SI = Second.rbegin(); 8953 auto SE = Second.rend(); 8954 for (; CI != CE && SI != SE; ++CI, ++SI) { 8955 if (CI->getAssociatedExpression()->getStmtClass() != 8956 SI->getAssociatedExpression()->getStmtClass()) 8957 break; 8958 // Are we dealing with different variables/fields? 8959 if (CI->getAssociatedDeclaration() != 8960 SI->getAssociatedDeclaration()) 8961 break; 8962 } 8963 8964 // Lists contain the same elements. 8965 if (CI == CE && SI == SE) 8966 return false; 8967 8968 // List with less elements is less than list with more elements. 8969 if (CI == CE || SI == SE) 8970 return CI == CE; 8971 8972 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8973 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8974 if (FD1->getParent() == FD2->getParent()) 8975 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8976 const auto It = 8977 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8978 return FD == FD1 || FD == FD2; 8979 }); 8980 return *It == FD1; 8981 }); 8982 } 8983 8984 // Associated with a capture, because the mapping flags depend on it. 8985 // Go through all of the elements with the overlapped elements. 8986 bool IsFirstComponentList = true; 8987 for (const auto &Pair : OverlappedData) { 8988 const MapData &L = *Pair.getFirst(); 8989 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8990 OpenMPMapClauseKind MapType; 8991 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8992 bool IsImplicit; 8993 const ValueDecl *Mapper; 8994 const Expr *VarRef; 8995 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8996 L; 8997 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8998 OverlappedComponents = Pair.getSecond(); 8999 generateInfoForComponentList( 9000 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9001 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9002 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9003 IsFirstComponentList = false; 9004 } 9005 // Go through other elements without overlapped elements. 9006 for (const MapData &L : DeclComponentLists) { 9007 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9008 OpenMPMapClauseKind MapType; 9009 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9010 bool IsImplicit; 9011 const ValueDecl *Mapper; 9012 const Expr *VarRef; 9013 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9014 L; 9015 auto It = OverlappedData.find(&L); 9016 if (It == OverlappedData.end()) 9017 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9018 Components, CombinedInfo, PartialStruct, 9019 IsFirstComponentList, IsImplicit, Mapper, 9020 /*ForDeviceAddr=*/false, VD, VarRef); 9021 IsFirstComponentList = false; 9022 } 9023 } 9024 9025 /// Generate the default map information for a given capture \a CI, 9026 /// record field declaration \a RI and captured value \a CV. 9027 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9028 const FieldDecl &RI, llvm::Value *CV, 9029 MapCombinedInfoTy &CombinedInfo) const { 9030 bool IsImplicit = true; 9031 // Do the default mapping. 9032 if (CI.capturesThis()) { 9033 CombinedInfo.Exprs.push_back(nullptr); 9034 CombinedInfo.BasePointers.push_back(CV); 9035 CombinedInfo.Pointers.push_back(CV); 9036 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9037 CombinedInfo.Sizes.push_back( 9038 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9039 CGF.Int64Ty, /*isSigned=*/true)); 9040 // Default map type. 9041 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9042 } else if (CI.capturesVariableByCopy()) { 9043 const VarDecl *VD = CI.getCapturedVar(); 9044 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9045 CombinedInfo.BasePointers.push_back(CV); 9046 CombinedInfo.Pointers.push_back(CV); 9047 if (!RI.getType()->isAnyPointerType()) { 9048 // We have to signal to the runtime captures passed by value that are 9049 // not pointers. 9050 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9051 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9052 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9053 } else { 9054 // Pointers are implicitly mapped with a zero size and no flags 9055 // (other than first map that is added for all implicit maps). 9056 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9057 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9058 } 9059 auto I = FirstPrivateDecls.find(VD); 9060 if (I != FirstPrivateDecls.end()) 9061 IsImplicit = I->getSecond(); 9062 } else { 9063 assert(CI.capturesVariable() && "Expected captured reference."); 9064 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9065 QualType ElementType = PtrTy->getPointeeType(); 9066 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9067 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9068 // The default map type for a scalar/complex type is 'to' because by 9069 // default the value doesn't have to be retrieved. For an aggregate 9070 // type, the default is 'tofrom'. 9071 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9072 const VarDecl *VD = CI.getCapturedVar(); 9073 auto I = FirstPrivateDecls.find(VD); 9074 if (I != FirstPrivateDecls.end() && 9075 VD->getType().isConstant(CGF.getContext())) { 9076 llvm::Constant *Addr = 9077 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9078 // Copy the value of the original variable to the new global copy. 9079 CGF.Builder.CreateMemCpy( 9080 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9081 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9082 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9083 // Use new global variable as the base pointers. 9084 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9085 CombinedInfo.BasePointers.push_back(Addr); 9086 CombinedInfo.Pointers.push_back(Addr); 9087 } else { 9088 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9089 CombinedInfo.BasePointers.push_back(CV); 9090 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9091 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9092 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9093 AlignmentSource::Decl)); 9094 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9095 } else { 9096 CombinedInfo.Pointers.push_back(CV); 9097 } 9098 } 9099 if (I != FirstPrivateDecls.end()) 9100 IsImplicit = I->getSecond(); 9101 } 9102 // Every default map produces a single argument which is a target parameter. 9103 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9104 9105 // Add flag stating this is an implicit map. 9106 if (IsImplicit) 9107 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9108 9109 // No user-defined mapper for default mapping. 9110 CombinedInfo.Mappers.push_back(nullptr); 9111 } 9112 }; 9113 } // anonymous namespace 9114 9115 static void emitNonContiguousDescriptor( 9116 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9117 CGOpenMPRuntime::TargetDataInfo &Info) { 9118 CodeGenModule &CGM = CGF.CGM; 9119 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9120 &NonContigInfo = CombinedInfo.NonContigInfo; 9121 9122 // Build an array of struct descriptor_dim and then assign it to 9123 // offload_args. 9124 // 9125 // struct descriptor_dim { 9126 // uint64_t offset; 9127 // uint64_t count; 9128 // uint64_t stride 9129 // }; 9130 ASTContext &C = CGF.getContext(); 9131 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9132 RecordDecl *RD; 9133 RD = C.buildImplicitRecord("descriptor_dim"); 9134 RD->startDefinition(); 9135 addFieldToRecordDecl(C, RD, Int64Ty); 9136 addFieldToRecordDecl(C, RD, Int64Ty); 9137 addFieldToRecordDecl(C, RD, Int64Ty); 9138 RD->completeDefinition(); 9139 QualType DimTy = C.getRecordType(RD); 9140 9141 enum { OffsetFD = 0, CountFD, StrideFD }; 9142 // We need two index variable here since the size of "Dims" is the same as the 9143 // size of Components, however, the size of offset, count, and stride is equal 9144 // to the size of base declaration that is non-contiguous. 9145 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9146 // Skip emitting ir if dimension size is 1 since it cannot be 9147 // non-contiguous. 9148 if (NonContigInfo.Dims[I] == 1) 9149 continue; 9150 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9151 QualType ArrayTy = 9152 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9153 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9154 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9155 unsigned RevIdx = EE - II - 1; 9156 LValue DimsLVal = CGF.MakeAddrLValue( 9157 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9158 // Offset 9159 LValue OffsetLVal = CGF.EmitLValueForField( 9160 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9161 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9162 // Count 9163 LValue CountLVal = CGF.EmitLValueForField( 9164 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9165 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9166 // Stride 9167 LValue StrideLVal = CGF.EmitLValueForField( 9168 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9169 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9170 } 9171 // args[I] = &dims 9172 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9173 DimsAddr, CGM.Int8PtrTy); 9174 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9175 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9176 Info.PointersArray, 0, I); 9177 Address PAddr(P, CGF.getPointerAlign()); 9178 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9179 ++L; 9180 } 9181 } 9182 9183 /// Emit a string constant containing the names of the values mapped to the 9184 /// offloading runtime library. 9185 llvm::Constant * 9186 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9187 MappableExprsHandler::MappingExprInfo &MapExprs) { 9188 llvm::Constant *SrcLocStr; 9189 if (!MapExprs.getMapDecl()) { 9190 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9191 } else { 9192 std::string ExprName = ""; 9193 if (MapExprs.getMapExpr()) { 9194 PrintingPolicy P(CGF.getContext().getLangOpts()); 9195 llvm::raw_string_ostream OS(ExprName); 9196 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9197 OS.flush(); 9198 } else { 9199 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9200 } 9201 9202 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9203 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9204 const char *FileName = PLoc.getFilename(); 9205 unsigned Line = PLoc.getLine(); 9206 unsigned Column = PLoc.getColumn(); 9207 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9208 Line, Column); 9209 } 9210 9211 return SrcLocStr; 9212 } 9213 9214 /// Emit the arrays used to pass the captures and map information to the 9215 /// offloading runtime library. If there is no map or capture information, 9216 /// return nullptr by reference. 9217 static void emitOffloadingArrays( 9218 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9219 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9220 bool IsNonContiguous = false) { 9221 CodeGenModule &CGM = CGF.CGM; 9222 ASTContext &Ctx = CGF.getContext(); 9223 9224 // Reset the array information. 9225 Info.clearArrayInfo(); 9226 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9227 9228 if (Info.NumberOfPtrs) { 9229 // Detect if we have any capture size requiring runtime evaluation of the 9230 // size so that a constant array could be eventually used. 9231 bool hasRuntimeEvaluationCaptureSize = false; 9232 for (llvm::Value *S : CombinedInfo.Sizes) 9233 if (!isa<llvm::Constant>(S)) { 9234 hasRuntimeEvaluationCaptureSize = true; 9235 break; 9236 } 9237 9238 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9239 QualType PointerArrayType = Ctx.getConstantArrayType( 9240 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9241 /*IndexTypeQuals=*/0); 9242 9243 Info.BasePointersArray = 9244 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9245 Info.PointersArray = 9246 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9247 Address MappersArray = 9248 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9249 Info.MappersArray = MappersArray.getPointer(); 9250 9251 // If we don't have any VLA types or other types that require runtime 9252 // evaluation, we can use a constant array for the map sizes, otherwise we 9253 // need to fill up the arrays as we do for the pointers. 9254 QualType Int64Ty = 9255 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9256 if (hasRuntimeEvaluationCaptureSize) { 9257 QualType SizeArrayType = Ctx.getConstantArrayType( 9258 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9259 /*IndexTypeQuals=*/0); 9260 Info.SizesArray = 9261 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9262 } else { 9263 // We expect all the sizes to be constant, so we collect them to create 9264 // a constant array. 9265 SmallVector<llvm::Constant *, 16> ConstSizes; 9266 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9267 if (IsNonContiguous && 9268 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9269 ConstSizes.push_back(llvm::ConstantInt::get( 9270 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9271 } else { 9272 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9273 } 9274 } 9275 9276 auto *SizesArrayInit = llvm::ConstantArray::get( 9277 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9278 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9279 auto *SizesArrayGbl = new llvm::GlobalVariable( 9280 CGM.getModule(), SizesArrayInit->getType(), 9281 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9282 SizesArrayInit, Name); 9283 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9284 Info.SizesArray = SizesArrayGbl; 9285 } 9286 9287 // The map types are always constant so we don't need to generate code to 9288 // fill arrays. Instead, we create an array constant. 9289 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9290 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9291 llvm::Constant *MapTypesArrayInit = 9292 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9293 std::string MaptypesName = 9294 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9295 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9296 CGM.getModule(), MapTypesArrayInit->getType(), 9297 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9298 MapTypesArrayInit, MaptypesName); 9299 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9300 Info.MapTypesArray = MapTypesArrayGbl; 9301 9302 // The information types are only built if there is debug information 9303 // requested. 9304 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9305 Info.MapNamesArray = llvm::Constant::getNullValue( 9306 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9307 } else { 9308 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9309 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9310 }; 9311 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9312 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9313 9314 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9315 llvm::ArrayType::get( 9316 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9317 CombinedInfo.Exprs.size()), 9318 InfoMap); 9319 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9320 CGM.getModule(), MapNamesArrayInit->getType(), 9321 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9322 MapNamesArrayInit, 9323 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9324 Info.MapNamesArray = MapNamesArrayGbl; 9325 } 9326 9327 // If there's a present map type modifier, it must not be applied to the end 9328 // of a region, so generate a separate map type array in that case. 9329 if (Info.separateBeginEndCalls()) { 9330 bool EndMapTypesDiffer = false; 9331 for (uint64_t &Type : Mapping) { 9332 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9333 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9334 EndMapTypesDiffer = true; 9335 } 9336 } 9337 if (EndMapTypesDiffer) { 9338 MapTypesArrayInit = 9339 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9340 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9341 MapTypesArrayGbl = new llvm::GlobalVariable( 9342 CGM.getModule(), MapTypesArrayInit->getType(), 9343 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9344 MapTypesArrayInit, MaptypesName); 9345 MapTypesArrayGbl->setUnnamedAddr( 9346 llvm::GlobalValue::UnnamedAddr::Global); 9347 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9348 } 9349 } 9350 9351 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9352 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9353 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9354 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9355 Info.BasePointersArray, 0, I); 9356 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9357 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9358 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9359 CGF.Builder.CreateStore(BPVal, BPAddr); 9360 9361 if (Info.requiresDevicePointerInfo()) 9362 if (const ValueDecl *DevVD = 9363 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9364 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9365 9366 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9367 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9368 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9369 Info.PointersArray, 0, I); 9370 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9371 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9372 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9373 CGF.Builder.CreateStore(PVal, PAddr); 9374 9375 if (hasRuntimeEvaluationCaptureSize) { 9376 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9377 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9378 Info.SizesArray, 9379 /*Idx0=*/0, 9380 /*Idx1=*/I); 9381 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9382 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9383 CGM.Int64Ty, 9384 /*isSigned=*/true), 9385 SAddr); 9386 } 9387 9388 // Fill up the mapper array. 9389 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9390 if (CombinedInfo.Mappers[I]) { 9391 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9392 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9393 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9394 Info.HasMapper = true; 9395 } 9396 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9397 CGF.Builder.CreateStore(MFunc, MAddr); 9398 } 9399 } 9400 9401 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9402 Info.NumberOfPtrs == 0) 9403 return; 9404 9405 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9406 } 9407 9408 namespace { 9409 /// Additional arguments for emitOffloadingArraysArgument function. 9410 struct ArgumentsOptions { 9411 bool ForEndCall = false; 9412 ArgumentsOptions() = default; 9413 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9414 }; 9415 } // namespace 9416 9417 /// Emit the arguments to be passed to the runtime library based on the 9418 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9419 /// ForEndCall, emit map types to be passed for the end of the region instead of 9420 /// the beginning. 9421 static void emitOffloadingArraysArgument( 9422 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9423 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9424 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9425 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9426 const ArgumentsOptions &Options = ArgumentsOptions()) { 9427 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9428 "expected region end call to runtime only when end call is separate"); 9429 CodeGenModule &CGM = CGF.CGM; 9430 if (Info.NumberOfPtrs) { 9431 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9432 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9433 Info.BasePointersArray, 9434 /*Idx0=*/0, /*Idx1=*/0); 9435 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9436 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9437 Info.PointersArray, 9438 /*Idx0=*/0, 9439 /*Idx1=*/0); 9440 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9441 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9442 /*Idx0=*/0, /*Idx1=*/0); 9443 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9444 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9445 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9446 : Info.MapTypesArray, 9447 /*Idx0=*/0, 9448 /*Idx1=*/0); 9449 9450 // Only emit the mapper information arrays if debug information is 9451 // requested. 9452 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9453 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9454 else 9455 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9456 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9457 Info.MapNamesArray, 9458 /*Idx0=*/0, 9459 /*Idx1=*/0); 9460 // If there is no user-defined mapper, set the mapper array to nullptr to 9461 // avoid an unnecessary data privatization 9462 if (!Info.HasMapper) 9463 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9464 else 9465 MappersArrayArg = 9466 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9467 } else { 9468 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9469 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9470 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9471 MapTypesArrayArg = 9472 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9473 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9474 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9475 } 9476 } 9477 9478 /// Check for inner distribute directive. 9479 static const OMPExecutableDirective * 9480 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9481 const auto *CS = D.getInnermostCapturedStmt(); 9482 const auto *Body = 9483 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9484 const Stmt *ChildStmt = 9485 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9486 9487 if (const auto *NestedDir = 9488 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9489 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9490 switch (D.getDirectiveKind()) { 9491 case OMPD_target: 9492 if (isOpenMPDistributeDirective(DKind)) 9493 return NestedDir; 9494 if (DKind == OMPD_teams) { 9495 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9496 /*IgnoreCaptured=*/true); 9497 if (!Body) 9498 return nullptr; 9499 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9500 if (const auto *NND = 9501 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9502 DKind = NND->getDirectiveKind(); 9503 if (isOpenMPDistributeDirective(DKind)) 9504 return NND; 9505 } 9506 } 9507 return nullptr; 9508 case OMPD_target_teams: 9509 if (isOpenMPDistributeDirective(DKind)) 9510 return NestedDir; 9511 return nullptr; 9512 case OMPD_target_parallel: 9513 case OMPD_target_simd: 9514 case OMPD_target_parallel_for: 9515 case OMPD_target_parallel_for_simd: 9516 return nullptr; 9517 case OMPD_target_teams_distribute: 9518 case OMPD_target_teams_distribute_simd: 9519 case OMPD_target_teams_distribute_parallel_for: 9520 case OMPD_target_teams_distribute_parallel_for_simd: 9521 case OMPD_parallel: 9522 case OMPD_for: 9523 case OMPD_parallel_for: 9524 case OMPD_parallel_master: 9525 case OMPD_parallel_sections: 9526 case OMPD_for_simd: 9527 case OMPD_parallel_for_simd: 9528 case OMPD_cancel: 9529 case OMPD_cancellation_point: 9530 case OMPD_ordered: 9531 case OMPD_threadprivate: 9532 case OMPD_allocate: 9533 case OMPD_task: 9534 case OMPD_simd: 9535 case OMPD_tile: 9536 case OMPD_sections: 9537 case OMPD_section: 9538 case OMPD_single: 9539 case OMPD_master: 9540 case OMPD_critical: 9541 case OMPD_taskyield: 9542 case OMPD_barrier: 9543 case OMPD_taskwait: 9544 case OMPD_taskgroup: 9545 case OMPD_atomic: 9546 case OMPD_flush: 9547 case OMPD_depobj: 9548 case OMPD_scan: 9549 case OMPD_teams: 9550 case OMPD_target_data: 9551 case OMPD_target_exit_data: 9552 case OMPD_target_enter_data: 9553 case OMPD_distribute: 9554 case OMPD_distribute_simd: 9555 case OMPD_distribute_parallel_for: 9556 case OMPD_distribute_parallel_for_simd: 9557 case OMPD_teams_distribute: 9558 case OMPD_teams_distribute_simd: 9559 case OMPD_teams_distribute_parallel_for: 9560 case OMPD_teams_distribute_parallel_for_simd: 9561 case OMPD_target_update: 9562 case OMPD_declare_simd: 9563 case OMPD_declare_variant: 9564 case OMPD_begin_declare_variant: 9565 case OMPD_end_declare_variant: 9566 case OMPD_declare_target: 9567 case OMPD_end_declare_target: 9568 case OMPD_declare_reduction: 9569 case OMPD_declare_mapper: 9570 case OMPD_taskloop: 9571 case OMPD_taskloop_simd: 9572 case OMPD_master_taskloop: 9573 case OMPD_master_taskloop_simd: 9574 case OMPD_parallel_master_taskloop: 9575 case OMPD_parallel_master_taskloop_simd: 9576 case OMPD_requires: 9577 case OMPD_unknown: 9578 default: 9579 llvm_unreachable("Unexpected directive."); 9580 } 9581 } 9582 9583 return nullptr; 9584 } 9585 9586 /// Emit the user-defined mapper function. The code generation follows the 9587 /// pattern in the example below. 9588 /// \code 9589 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9590 /// void *base, void *begin, 9591 /// int64_t size, int64_t type, 9592 /// void *name = nullptr) { 9593 /// // Allocate space for an array section first or add a base/begin for 9594 /// // pointer dereference. 9595 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9596 /// !maptype.IsDelete) 9597 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9598 /// size*sizeof(Ty), clearToFromMember(type)); 9599 /// // Map members. 9600 /// for (unsigned i = 0; i < size; i++) { 9601 /// // For each component specified by this mapper: 9602 /// for (auto c : begin[i]->all_components) { 9603 /// if (c.hasMapper()) 9604 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9605 /// c.arg_type, c.arg_name); 9606 /// else 9607 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9608 /// c.arg_begin, c.arg_size, c.arg_type, 9609 /// c.arg_name); 9610 /// } 9611 /// } 9612 /// // Delete the array section. 9613 /// if (size > 1 && maptype.IsDelete) 9614 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9615 /// size*sizeof(Ty), clearToFromMember(type)); 9616 /// } 9617 /// \endcode 9618 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9619 CodeGenFunction *CGF) { 9620 if (UDMMap.count(D) > 0) 9621 return; 9622 ASTContext &C = CGM.getContext(); 9623 QualType Ty = D->getType(); 9624 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9625 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9626 auto *MapperVarDecl = 9627 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9628 SourceLocation Loc = D->getLocation(); 9629 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9630 9631 // Prepare mapper function arguments and attributes. 9632 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9633 C.VoidPtrTy, ImplicitParamDecl::Other); 9634 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9635 ImplicitParamDecl::Other); 9636 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9637 C.VoidPtrTy, ImplicitParamDecl::Other); 9638 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9639 ImplicitParamDecl::Other); 9640 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9641 ImplicitParamDecl::Other); 9642 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9643 ImplicitParamDecl::Other); 9644 FunctionArgList Args; 9645 Args.push_back(&HandleArg); 9646 Args.push_back(&BaseArg); 9647 Args.push_back(&BeginArg); 9648 Args.push_back(&SizeArg); 9649 Args.push_back(&TypeArg); 9650 Args.push_back(&NameArg); 9651 const CGFunctionInfo &FnInfo = 9652 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9653 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9654 SmallString<64> TyStr; 9655 llvm::raw_svector_ostream Out(TyStr); 9656 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9657 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9658 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9659 Name, &CGM.getModule()); 9660 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9661 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9662 // Start the mapper function code generation. 9663 CodeGenFunction MapperCGF(CGM); 9664 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9665 // Compute the starting and end addresses of array elements. 9666 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9667 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9668 C.getPointerType(Int64Ty), Loc); 9669 // Prepare common arguments for array initiation and deletion. 9670 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9671 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9672 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9673 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9674 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9675 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9676 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9677 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9678 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9679 // Convert the size in bytes into the number of array elements. 9680 Size = MapperCGF.Builder.CreateExactUDiv( 9681 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9682 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9683 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9684 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9685 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9686 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9687 C.getPointerType(Int64Ty), Loc); 9688 9689 // Emit array initiation if this is an array section and \p MapType indicates 9690 // that memory allocation is required. 9691 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9692 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9693 ElementSize, HeadBB, /*IsInit=*/true); 9694 9695 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9696 9697 // Emit the loop header block. 9698 MapperCGF.EmitBlock(HeadBB); 9699 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9700 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9701 // Evaluate whether the initial condition is satisfied. 9702 llvm::Value *IsEmpty = 9703 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9704 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9705 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9706 9707 // Emit the loop body block. 9708 MapperCGF.EmitBlock(BodyBB); 9709 llvm::BasicBlock *LastBB = BodyBB; 9710 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9711 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9712 PtrPHI->addIncoming(PtrBegin, EntryBB); 9713 Address PtrCurrent = 9714 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9715 .getAlignment() 9716 .alignmentOfArrayElement(ElementSize)); 9717 // Privatize the declared variable of mapper to be the current array element. 9718 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9719 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9720 (void)Scope.Privatize(); 9721 9722 // Get map clause information. Fill up the arrays with all mapped variables. 9723 MappableExprsHandler::MapCombinedInfoTy Info; 9724 MappableExprsHandler MEHandler(*D, MapperCGF); 9725 MEHandler.generateAllInfoForMapper(Info); 9726 9727 // Call the runtime API __tgt_mapper_num_components to get the number of 9728 // pre-existing components. 9729 llvm::Value *OffloadingArgs[] = {Handle}; 9730 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9731 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9732 OMPRTL___tgt_mapper_num_components), 9733 OffloadingArgs); 9734 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9735 PreviousSize, 9736 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9737 9738 // Fill up the runtime mapper handle for all components. 9739 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9740 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9741 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9742 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9743 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9744 llvm::Value *CurSizeArg = Info.Sizes[I]; 9745 llvm::Value *CurNameArg = 9746 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9747 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9748 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9749 9750 // Extract the MEMBER_OF field from the map type. 9751 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9752 llvm::Value *MemberMapType = 9753 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9754 9755 // Combine the map type inherited from user-defined mapper with that 9756 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9757 // bits of the \a MapType, which is the input argument of the mapper 9758 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9759 // bits of MemberMapType. 9760 // [OpenMP 5.0], 1.2.6. map-type decay. 9761 // | alloc | to | from | tofrom | release | delete 9762 // ---------------------------------------------------------- 9763 // alloc | alloc | alloc | alloc | alloc | release | delete 9764 // to | alloc | to | alloc | to | release | delete 9765 // from | alloc | alloc | from | from | release | delete 9766 // tofrom | alloc | to | from | tofrom | release | delete 9767 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9768 MapType, 9769 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9770 MappableExprsHandler::OMP_MAP_FROM)); 9771 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9772 llvm::BasicBlock *AllocElseBB = 9773 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9774 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9775 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9776 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9777 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9778 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9779 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9780 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9781 MapperCGF.EmitBlock(AllocBB); 9782 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9783 MemberMapType, 9784 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9785 MappableExprsHandler::OMP_MAP_FROM))); 9786 MapperCGF.Builder.CreateBr(EndBB); 9787 MapperCGF.EmitBlock(AllocElseBB); 9788 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9789 LeftToFrom, 9790 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9791 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9792 // In case of to, clear OMP_MAP_FROM. 9793 MapperCGF.EmitBlock(ToBB); 9794 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9795 MemberMapType, 9796 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9797 MapperCGF.Builder.CreateBr(EndBB); 9798 MapperCGF.EmitBlock(ToElseBB); 9799 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9800 LeftToFrom, 9801 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9802 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9803 // In case of from, clear OMP_MAP_TO. 9804 MapperCGF.EmitBlock(FromBB); 9805 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9806 MemberMapType, 9807 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9808 // In case of tofrom, do nothing. 9809 MapperCGF.EmitBlock(EndBB); 9810 LastBB = EndBB; 9811 llvm::PHINode *CurMapType = 9812 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9813 CurMapType->addIncoming(AllocMapType, AllocBB); 9814 CurMapType->addIncoming(ToMapType, ToBB); 9815 CurMapType->addIncoming(FromMapType, FromBB); 9816 CurMapType->addIncoming(MemberMapType, ToElseBB); 9817 9818 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9819 CurSizeArg, CurMapType, CurNameArg}; 9820 if (Info.Mappers[I]) { 9821 // Call the corresponding mapper function. 9822 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9823 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9824 assert(MapperFunc && "Expect a valid mapper function is available."); 9825 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9826 } else { 9827 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9828 // data structure. 9829 MapperCGF.EmitRuntimeCall( 9830 OMPBuilder.getOrCreateRuntimeFunction( 9831 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9832 OffloadingArgs); 9833 } 9834 } 9835 9836 // Update the pointer to point to the next element that needs to be mapped, 9837 // and check whether we have mapped all elements. 9838 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9839 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9840 PtrPHI->addIncoming(PtrNext, LastBB); 9841 llvm::Value *IsDone = 9842 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9843 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9844 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9845 9846 MapperCGF.EmitBlock(ExitBB); 9847 // Emit array deletion if this is an array section and \p MapType indicates 9848 // that deletion is required. 9849 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9850 ElementSize, DoneBB, /*IsInit=*/false); 9851 9852 // Emit the function exit block. 9853 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9854 MapperCGF.FinishFunction(); 9855 UDMMap.try_emplace(D, Fn); 9856 if (CGF) { 9857 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9858 Decls.second.push_back(D); 9859 } 9860 } 9861 9862 /// Emit the array initialization or deletion portion for user-defined mapper 9863 /// code generation. First, it evaluates whether an array section is mapped and 9864 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9865 /// true, and \a MapType indicates to not delete this array, array 9866 /// initialization code is generated. If \a IsInit is false, and \a MapType 9867 /// indicates to not this array, array deletion code is generated. 9868 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9869 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9870 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9871 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9872 StringRef Prefix = IsInit ? ".init" : ".del"; 9873 9874 // Evaluate if this is an array section. 9875 llvm::BasicBlock *BodyBB = 9876 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9877 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9878 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9879 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9880 MapType, 9881 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9882 llvm::Value *DeleteCond; 9883 llvm::Value *Cond; 9884 if (IsInit) { 9885 // base != begin? 9886 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9887 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9888 // IsPtrAndObj? 9889 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9890 MapType, 9891 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9892 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9893 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9894 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9895 DeleteCond = MapperCGF.Builder.CreateIsNull( 9896 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9897 } else { 9898 Cond = IsArray; 9899 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9900 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9901 } 9902 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9903 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9904 9905 MapperCGF.EmitBlock(BodyBB); 9906 // Get the array size by multiplying element size and element number (i.e., \p 9907 // Size). 9908 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9909 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9910 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9911 // memory allocation/deletion purpose only. 9912 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9913 MapType, 9914 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9915 MappableExprsHandler::OMP_MAP_FROM | 9916 MappableExprsHandler::OMP_MAP_MEMBER_OF))); 9917 llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9918 9919 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9920 // data structure. 9921 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9922 ArraySize, MapTypeArg, MapNameArg}; 9923 MapperCGF.EmitRuntimeCall( 9924 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9925 OMPRTL___tgt_push_mapper_component), 9926 OffloadingArgs); 9927 } 9928 9929 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9930 const OMPDeclareMapperDecl *D) { 9931 auto I = UDMMap.find(D); 9932 if (I != UDMMap.end()) 9933 return I->second; 9934 emitUserDefinedMapper(D); 9935 return UDMMap.lookup(D); 9936 } 9937 9938 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9939 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9940 llvm::Value *DeviceID, 9941 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9942 const OMPLoopDirective &D)> 9943 SizeEmitter) { 9944 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9945 const OMPExecutableDirective *TD = &D; 9946 // Get nested teams distribute kind directive, if any. 9947 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9948 TD = getNestedDistributeDirective(CGM.getContext(), D); 9949 if (!TD) 9950 return; 9951 const auto *LD = cast<OMPLoopDirective>(TD); 9952 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9953 PrePostActionTy &) { 9954 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9955 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9956 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9957 CGF.EmitRuntimeCall( 9958 OMPBuilder.getOrCreateRuntimeFunction( 9959 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 9960 Args); 9961 } 9962 }; 9963 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9964 } 9965 9966 void CGOpenMPRuntime::emitTargetCall( 9967 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9968 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9969 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9970 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9971 const OMPLoopDirective &D)> 9972 SizeEmitter) { 9973 if (!CGF.HaveInsertPoint()) 9974 return; 9975 9976 assert(OutlinedFn && "Invalid outlined function!"); 9977 9978 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9979 D.hasClausesOfKind<OMPNowaitClause>(); 9980 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9981 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9982 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9983 PrePostActionTy &) { 9984 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9985 }; 9986 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9987 9988 CodeGenFunction::OMPTargetDataInfo InputInfo; 9989 llvm::Value *MapTypesArray = nullptr; 9990 llvm::Value *MapNamesArray = nullptr; 9991 // Fill up the pointer arrays and transfer execution to the device. 9992 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9993 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9994 &CapturedVars, 9995 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9996 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9997 // Reverse offloading is not supported, so just execute on the host. 9998 if (RequiresOuterTask) { 9999 CapturedVars.clear(); 10000 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10001 } 10002 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10003 return; 10004 } 10005 10006 // On top of the arrays that were filled up, the target offloading call 10007 // takes as arguments the device id as well as the host pointer. The host 10008 // pointer is used by the runtime library to identify the current target 10009 // region, so it only has to be unique and not necessarily point to 10010 // anything. It could be the pointer to the outlined function that 10011 // implements the target region, but we aren't using that so that the 10012 // compiler doesn't need to keep that, and could therefore inline the host 10013 // function if proven worthwhile during optimization. 10014 10015 // From this point on, we need to have an ID of the target region defined. 10016 assert(OutlinedFnID && "Invalid outlined function ID!"); 10017 10018 // Emit device ID if any. 10019 llvm::Value *DeviceID; 10020 if (Device.getPointer()) { 10021 assert((Device.getInt() == OMPC_DEVICE_unknown || 10022 Device.getInt() == OMPC_DEVICE_device_num) && 10023 "Expected device_num modifier."); 10024 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10025 DeviceID = 10026 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10027 } else { 10028 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10029 } 10030 10031 // Emit the number of elements in the offloading arrays. 10032 llvm::Value *PointerNum = 10033 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10034 10035 // Return value of the runtime offloading call. 10036 llvm::Value *Return; 10037 10038 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10039 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10040 10041 // Source location for the ident struct 10042 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10043 10044 // Emit tripcount for the target loop-based directive. 10045 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10046 10047 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10048 // The target region is an outlined function launched by the runtime 10049 // via calls __tgt_target() or __tgt_target_teams(). 10050 // 10051 // __tgt_target() launches a target region with one team and one thread, 10052 // executing a serial region. This master thread may in turn launch 10053 // more threads within its team upon encountering a parallel region, 10054 // however, no additional teams can be launched on the device. 10055 // 10056 // __tgt_target_teams() launches a target region with one or more teams, 10057 // each with one or more threads. This call is required for target 10058 // constructs such as: 10059 // 'target teams' 10060 // 'target' / 'teams' 10061 // 'target teams distribute parallel for' 10062 // 'target parallel' 10063 // and so on. 10064 // 10065 // Note that on the host and CPU targets, the runtime implementation of 10066 // these calls simply call the outlined function without forking threads. 10067 // The outlined functions themselves have runtime calls to 10068 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10069 // the compiler in emitTeamsCall() and emitParallelCall(). 10070 // 10071 // In contrast, on the NVPTX target, the implementation of 10072 // __tgt_target_teams() launches a GPU kernel with the requested number 10073 // of teams and threads so no additional calls to the runtime are required. 10074 if (NumTeams) { 10075 // If we have NumTeams defined this means that we have an enclosed teams 10076 // region. Therefore we also expect to have NumThreads defined. These two 10077 // values should be defined in the presence of a teams directive, 10078 // regardless of having any clauses associated. If the user is using teams 10079 // but no clauses, these two values will be the default that should be 10080 // passed to the runtime library - a 32-bit integer with the value zero. 10081 assert(NumThreads && "Thread limit expression should be available along " 10082 "with number of teams."); 10083 llvm::Value *OffloadingArgs[] = {RTLoc, 10084 DeviceID, 10085 OutlinedFnID, 10086 PointerNum, 10087 InputInfo.BasePointersArray.getPointer(), 10088 InputInfo.PointersArray.getPointer(), 10089 InputInfo.SizesArray.getPointer(), 10090 MapTypesArray, 10091 MapNamesArray, 10092 InputInfo.MappersArray.getPointer(), 10093 NumTeams, 10094 NumThreads}; 10095 Return = CGF.EmitRuntimeCall( 10096 OMPBuilder.getOrCreateRuntimeFunction( 10097 CGM.getModule(), HasNowait 10098 ? OMPRTL___tgt_target_teams_nowait_mapper 10099 : OMPRTL___tgt_target_teams_mapper), 10100 OffloadingArgs); 10101 } else { 10102 llvm::Value *OffloadingArgs[] = {RTLoc, 10103 DeviceID, 10104 OutlinedFnID, 10105 PointerNum, 10106 InputInfo.BasePointersArray.getPointer(), 10107 InputInfo.PointersArray.getPointer(), 10108 InputInfo.SizesArray.getPointer(), 10109 MapTypesArray, 10110 MapNamesArray, 10111 InputInfo.MappersArray.getPointer()}; 10112 Return = CGF.EmitRuntimeCall( 10113 OMPBuilder.getOrCreateRuntimeFunction( 10114 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10115 : OMPRTL___tgt_target_mapper), 10116 OffloadingArgs); 10117 } 10118 10119 // Check the error code and execute the host version if required. 10120 llvm::BasicBlock *OffloadFailedBlock = 10121 CGF.createBasicBlock("omp_offload.failed"); 10122 llvm::BasicBlock *OffloadContBlock = 10123 CGF.createBasicBlock("omp_offload.cont"); 10124 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10125 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10126 10127 CGF.EmitBlock(OffloadFailedBlock); 10128 if (RequiresOuterTask) { 10129 CapturedVars.clear(); 10130 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10131 } 10132 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10133 CGF.EmitBranch(OffloadContBlock); 10134 10135 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10136 }; 10137 10138 // Notify that the host version must be executed. 10139 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10140 RequiresOuterTask](CodeGenFunction &CGF, 10141 PrePostActionTy &) { 10142 if (RequiresOuterTask) { 10143 CapturedVars.clear(); 10144 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10145 } 10146 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10147 }; 10148 10149 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10150 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10151 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10152 // Fill up the arrays with all the captured variables. 10153 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10154 10155 // Get mappable expression information. 10156 MappableExprsHandler MEHandler(D, CGF); 10157 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10158 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10159 10160 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10161 auto *CV = CapturedVars.begin(); 10162 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10163 CE = CS.capture_end(); 10164 CI != CE; ++CI, ++RI, ++CV) { 10165 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10166 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10167 10168 // VLA sizes are passed to the outlined region by copy and do not have map 10169 // information associated. 10170 if (CI->capturesVariableArrayType()) { 10171 CurInfo.Exprs.push_back(nullptr); 10172 CurInfo.BasePointers.push_back(*CV); 10173 CurInfo.Pointers.push_back(*CV); 10174 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10175 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10176 // Copy to the device as an argument. No need to retrieve it. 10177 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10178 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10179 MappableExprsHandler::OMP_MAP_IMPLICIT); 10180 CurInfo.Mappers.push_back(nullptr); 10181 } else { 10182 // If we have any information in the map clause, we use it, otherwise we 10183 // just do a default mapping. 10184 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10185 if (!CI->capturesThis()) 10186 MappedVarSet.insert(CI->getCapturedVar()); 10187 else 10188 MappedVarSet.insert(nullptr); 10189 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10190 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10191 // Generate correct mapping for variables captured by reference in 10192 // lambdas. 10193 if (CI->capturesVariable()) 10194 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10195 CurInfo, LambdaPointers); 10196 } 10197 // We expect to have at least an element of information for this capture. 10198 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10199 "Non-existing map pointer for capture!"); 10200 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10201 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10202 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10203 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10204 "Inconsistent map information sizes!"); 10205 10206 // If there is an entry in PartialStruct it means we have a struct with 10207 // individual members mapped. Emit an extra combined entry. 10208 if (PartialStruct.Base.isValid()) { 10209 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10210 MEHandler.emitCombinedEntry( 10211 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10212 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10213 } 10214 10215 // We need to append the results of this capture to what we already have. 10216 CombinedInfo.append(CurInfo); 10217 } 10218 // Adjust MEMBER_OF flags for the lambdas captures. 10219 MEHandler.adjustMemberOfForLambdaCaptures( 10220 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10221 CombinedInfo.Types); 10222 // Map any list items in a map clause that were not captures because they 10223 // weren't referenced within the construct. 10224 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10225 10226 TargetDataInfo Info; 10227 // Fill up the arrays and create the arguments. 10228 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10229 emitOffloadingArraysArgument( 10230 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10231 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10232 {/*ForEndTask=*/false}); 10233 10234 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10235 InputInfo.BasePointersArray = 10236 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10237 InputInfo.PointersArray = 10238 Address(Info.PointersArray, CGM.getPointerAlign()); 10239 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10240 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10241 MapTypesArray = Info.MapTypesArray; 10242 MapNamesArray = Info.MapNamesArray; 10243 if (RequiresOuterTask) 10244 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10245 else 10246 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10247 }; 10248 10249 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10250 CodeGenFunction &CGF, PrePostActionTy &) { 10251 if (RequiresOuterTask) { 10252 CodeGenFunction::OMPTargetDataInfo InputInfo; 10253 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10254 } else { 10255 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10256 } 10257 }; 10258 10259 // If we have a target function ID it means that we need to support 10260 // offloading, otherwise, just execute on the host. We need to execute on host 10261 // regardless of the conditional in the if clause if, e.g., the user do not 10262 // specify target triples. 10263 if (OutlinedFnID) { 10264 if (IfCond) { 10265 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10266 } else { 10267 RegionCodeGenTy ThenRCG(TargetThenGen); 10268 ThenRCG(CGF); 10269 } 10270 } else { 10271 RegionCodeGenTy ElseRCG(TargetElseGen); 10272 ElseRCG(CGF); 10273 } 10274 } 10275 10276 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10277 StringRef ParentName) { 10278 if (!S) 10279 return; 10280 10281 // Codegen OMP target directives that offload compute to the device. 10282 bool RequiresDeviceCodegen = 10283 isa<OMPExecutableDirective>(S) && 10284 isOpenMPTargetExecutionDirective( 10285 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10286 10287 if (RequiresDeviceCodegen) { 10288 const auto &E = *cast<OMPExecutableDirective>(S); 10289 unsigned DeviceID; 10290 unsigned FileID; 10291 unsigned Line; 10292 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10293 FileID, Line); 10294 10295 // Is this a target region that should not be emitted as an entry point? If 10296 // so just signal we are done with this target region. 10297 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10298 ParentName, Line)) 10299 return; 10300 10301 switch (E.getDirectiveKind()) { 10302 case OMPD_target: 10303 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10304 cast<OMPTargetDirective>(E)); 10305 break; 10306 case OMPD_target_parallel: 10307 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10308 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10309 break; 10310 case OMPD_target_teams: 10311 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10312 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10313 break; 10314 case OMPD_target_teams_distribute: 10315 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10316 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10317 break; 10318 case OMPD_target_teams_distribute_simd: 10319 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10320 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10321 break; 10322 case OMPD_target_parallel_for: 10323 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10324 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10325 break; 10326 case OMPD_target_parallel_for_simd: 10327 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10328 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10329 break; 10330 case OMPD_target_simd: 10331 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10332 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10333 break; 10334 case OMPD_target_teams_distribute_parallel_for: 10335 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10336 CGM, ParentName, 10337 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10338 break; 10339 case OMPD_target_teams_distribute_parallel_for_simd: 10340 CodeGenFunction:: 10341 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10342 CGM, ParentName, 10343 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10344 break; 10345 case OMPD_parallel: 10346 case OMPD_for: 10347 case OMPD_parallel_for: 10348 case OMPD_parallel_master: 10349 case OMPD_parallel_sections: 10350 case OMPD_for_simd: 10351 case OMPD_parallel_for_simd: 10352 case OMPD_cancel: 10353 case OMPD_cancellation_point: 10354 case OMPD_ordered: 10355 case OMPD_threadprivate: 10356 case OMPD_allocate: 10357 case OMPD_task: 10358 case OMPD_simd: 10359 case OMPD_tile: 10360 case OMPD_sections: 10361 case OMPD_section: 10362 case OMPD_single: 10363 case OMPD_master: 10364 case OMPD_critical: 10365 case OMPD_taskyield: 10366 case OMPD_barrier: 10367 case OMPD_taskwait: 10368 case OMPD_taskgroup: 10369 case OMPD_atomic: 10370 case OMPD_flush: 10371 case OMPD_depobj: 10372 case OMPD_scan: 10373 case OMPD_teams: 10374 case OMPD_target_data: 10375 case OMPD_target_exit_data: 10376 case OMPD_target_enter_data: 10377 case OMPD_distribute: 10378 case OMPD_distribute_simd: 10379 case OMPD_distribute_parallel_for: 10380 case OMPD_distribute_parallel_for_simd: 10381 case OMPD_teams_distribute: 10382 case OMPD_teams_distribute_simd: 10383 case OMPD_teams_distribute_parallel_for: 10384 case OMPD_teams_distribute_parallel_for_simd: 10385 case OMPD_target_update: 10386 case OMPD_declare_simd: 10387 case OMPD_declare_variant: 10388 case OMPD_begin_declare_variant: 10389 case OMPD_end_declare_variant: 10390 case OMPD_declare_target: 10391 case OMPD_end_declare_target: 10392 case OMPD_declare_reduction: 10393 case OMPD_declare_mapper: 10394 case OMPD_taskloop: 10395 case OMPD_taskloop_simd: 10396 case OMPD_master_taskloop: 10397 case OMPD_master_taskloop_simd: 10398 case OMPD_parallel_master_taskloop: 10399 case OMPD_parallel_master_taskloop_simd: 10400 case OMPD_requires: 10401 case OMPD_unknown: 10402 default: 10403 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10404 } 10405 return; 10406 } 10407 10408 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10409 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10410 return; 10411 10412 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10413 return; 10414 } 10415 10416 // If this is a lambda function, look into its body. 10417 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10418 S = L->getBody(); 10419 10420 // Keep looking for target regions recursively. 10421 for (const Stmt *II : S->children()) 10422 scanForTargetRegionsFunctions(II, ParentName); 10423 } 10424 10425 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10426 // If emitting code for the host, we do not process FD here. Instead we do 10427 // the normal code generation. 10428 if (!CGM.getLangOpts().OpenMPIsDevice) { 10429 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10430 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10431 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10432 // Do not emit device_type(nohost) functions for the host. 10433 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10434 return true; 10435 } 10436 return false; 10437 } 10438 10439 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10440 // Try to detect target regions in the function. 10441 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10442 StringRef Name = CGM.getMangledName(GD); 10443 scanForTargetRegionsFunctions(FD->getBody(), Name); 10444 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10445 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10446 // Do not emit device_type(nohost) functions for the host. 10447 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10448 return true; 10449 } 10450 10451 // Do not to emit function if it is not marked as declare target. 10452 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10453 AlreadyEmittedTargetDecls.count(VD) == 0; 10454 } 10455 10456 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10457 if (!CGM.getLangOpts().OpenMPIsDevice) 10458 return false; 10459 10460 // Check if there are Ctors/Dtors in this declaration and look for target 10461 // regions in it. We use the complete variant to produce the kernel name 10462 // mangling. 10463 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10464 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10465 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10466 StringRef ParentName = 10467 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10468 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10469 } 10470 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10471 StringRef ParentName = 10472 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10473 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10474 } 10475 } 10476 10477 // Do not to emit variable if it is not marked as declare target. 10478 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10479 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10480 cast<VarDecl>(GD.getDecl())); 10481 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10482 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10483 HasRequiresUnifiedSharedMemory)) { 10484 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10485 return true; 10486 } 10487 return false; 10488 } 10489 10490 llvm::Constant * 10491 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10492 const VarDecl *VD) { 10493 assert(VD->getType().isConstant(CGM.getContext()) && 10494 "Expected constant variable."); 10495 StringRef VarName; 10496 llvm::Constant *Addr; 10497 llvm::GlobalValue::LinkageTypes Linkage; 10498 QualType Ty = VD->getType(); 10499 SmallString<128> Buffer; 10500 { 10501 unsigned DeviceID; 10502 unsigned FileID; 10503 unsigned Line; 10504 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10505 FileID, Line); 10506 llvm::raw_svector_ostream OS(Buffer); 10507 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10508 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10509 VarName = OS.str(); 10510 } 10511 Linkage = llvm::GlobalValue::InternalLinkage; 10512 Addr = 10513 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10514 getDefaultFirstprivateAddressSpace()); 10515 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10516 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10517 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10518 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10519 VarName, Addr, VarSize, 10520 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10521 return Addr; 10522 } 10523 10524 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10525 llvm::Constant *Addr) { 10526 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10527 !CGM.getLangOpts().OpenMPIsDevice) 10528 return; 10529 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10530 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10531 if (!Res) { 10532 if (CGM.getLangOpts().OpenMPIsDevice) { 10533 // Register non-target variables being emitted in device code (debug info 10534 // may cause this). 10535 StringRef VarName = CGM.getMangledName(VD); 10536 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10537 } 10538 return; 10539 } 10540 // Register declare target variables. 10541 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10542 StringRef VarName; 10543 CharUnits VarSize; 10544 llvm::GlobalValue::LinkageTypes Linkage; 10545 10546 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10547 !HasRequiresUnifiedSharedMemory) { 10548 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10549 VarName = CGM.getMangledName(VD); 10550 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10551 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10552 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10553 } else { 10554 VarSize = CharUnits::Zero(); 10555 } 10556 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10557 // Temp solution to prevent optimizations of the internal variables. 10558 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10559 std::string RefName = getName({VarName, "ref"}); 10560 if (!CGM.GetGlobalValue(RefName)) { 10561 llvm::Constant *AddrRef = 10562 getOrCreateInternalVariable(Addr->getType(), RefName); 10563 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10564 GVAddrRef->setConstant(/*Val=*/true); 10565 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10566 GVAddrRef->setInitializer(Addr); 10567 CGM.addCompilerUsedGlobal(GVAddrRef); 10568 } 10569 } 10570 } else { 10571 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10572 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10573 HasRequiresUnifiedSharedMemory)) && 10574 "Declare target attribute must link or to with unified memory."); 10575 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10576 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10577 else 10578 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10579 10580 if (CGM.getLangOpts().OpenMPIsDevice) { 10581 VarName = Addr->getName(); 10582 Addr = nullptr; 10583 } else { 10584 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10585 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10586 } 10587 VarSize = CGM.getPointerSize(); 10588 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10589 } 10590 10591 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10592 VarName, Addr, VarSize, Flags, Linkage); 10593 } 10594 10595 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10596 if (isa<FunctionDecl>(GD.getDecl()) || 10597 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10598 return emitTargetFunctions(GD); 10599 10600 return emitTargetGlobalVariable(GD); 10601 } 10602 10603 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10604 for (const VarDecl *VD : DeferredGlobalVariables) { 10605 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10606 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10607 if (!Res) 10608 continue; 10609 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10610 !HasRequiresUnifiedSharedMemory) { 10611 CGM.EmitGlobal(VD); 10612 } else { 10613 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10614 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10615 HasRequiresUnifiedSharedMemory)) && 10616 "Expected link clause or to clause with unified memory."); 10617 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10618 } 10619 } 10620 } 10621 10622 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10623 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10624 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10625 " Expected target-based directive."); 10626 } 10627 10628 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10629 for (const OMPClause *Clause : D->clauselists()) { 10630 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10631 HasRequiresUnifiedSharedMemory = true; 10632 } else if (const auto *AC = 10633 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10634 switch (AC->getAtomicDefaultMemOrderKind()) { 10635 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10636 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10637 break; 10638 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10639 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10640 break; 10641 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10642 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10643 break; 10644 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10645 break; 10646 } 10647 } 10648 } 10649 } 10650 10651 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10652 return RequiresAtomicOrdering; 10653 } 10654 10655 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10656 LangAS &AS) { 10657 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10658 return false; 10659 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10660 switch(A->getAllocatorType()) { 10661 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10662 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10663 // Not supported, fallback to the default mem space. 10664 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10665 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10666 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10667 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10668 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10669 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10670 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10671 AS = LangAS::Default; 10672 return true; 10673 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10674 llvm_unreachable("Expected predefined allocator for the variables with the " 10675 "static storage."); 10676 } 10677 return false; 10678 } 10679 10680 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10681 return HasRequiresUnifiedSharedMemory; 10682 } 10683 10684 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10685 CodeGenModule &CGM) 10686 : CGM(CGM) { 10687 if (CGM.getLangOpts().OpenMPIsDevice) { 10688 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10689 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10690 } 10691 } 10692 10693 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10694 if (CGM.getLangOpts().OpenMPIsDevice) 10695 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10696 } 10697 10698 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10699 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10700 return true; 10701 10702 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10703 // Do not to emit function if it is marked as declare target as it was already 10704 // emitted. 10705 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10706 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10707 if (auto *F = dyn_cast_or_null<llvm::Function>( 10708 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10709 return !F->isDeclaration(); 10710 return false; 10711 } 10712 return true; 10713 } 10714 10715 return !AlreadyEmittedTargetDecls.insert(D).second; 10716 } 10717 10718 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10719 // If we don't have entries or if we are emitting code for the device, we 10720 // don't need to do anything. 10721 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10722 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10723 (OffloadEntriesInfoManager.empty() && 10724 !HasEmittedDeclareTargetRegion && 10725 !HasEmittedTargetRegion)) 10726 return nullptr; 10727 10728 // Create and register the function that handles the requires directives. 10729 ASTContext &C = CGM.getContext(); 10730 10731 llvm::Function *RequiresRegFn; 10732 { 10733 CodeGenFunction CGF(CGM); 10734 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10735 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10736 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10737 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10738 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10739 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10740 // TODO: check for other requires clauses. 10741 // The requires directive takes effect only when a target region is 10742 // present in the compilation unit. Otherwise it is ignored and not 10743 // passed to the runtime. This avoids the runtime from throwing an error 10744 // for mismatching requires clauses across compilation units that don't 10745 // contain at least 1 target region. 10746 assert((HasEmittedTargetRegion || 10747 HasEmittedDeclareTargetRegion || 10748 !OffloadEntriesInfoManager.empty()) && 10749 "Target or declare target region expected."); 10750 if (HasRequiresUnifiedSharedMemory) 10751 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10753 CGM.getModule(), OMPRTL___tgt_register_requires), 10754 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10755 CGF.FinishFunction(); 10756 } 10757 return RequiresRegFn; 10758 } 10759 10760 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10761 const OMPExecutableDirective &D, 10762 SourceLocation Loc, 10763 llvm::Function *OutlinedFn, 10764 ArrayRef<llvm::Value *> CapturedVars) { 10765 if (!CGF.HaveInsertPoint()) 10766 return; 10767 10768 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10769 CodeGenFunction::RunCleanupsScope Scope(CGF); 10770 10771 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10772 llvm::Value *Args[] = { 10773 RTLoc, 10774 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10775 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10776 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10777 RealArgs.append(std::begin(Args), std::end(Args)); 10778 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10779 10780 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10781 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10782 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10783 } 10784 10785 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10786 const Expr *NumTeams, 10787 const Expr *ThreadLimit, 10788 SourceLocation Loc) { 10789 if (!CGF.HaveInsertPoint()) 10790 return; 10791 10792 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10793 10794 llvm::Value *NumTeamsVal = 10795 NumTeams 10796 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10797 CGF.CGM.Int32Ty, /* isSigned = */ true) 10798 : CGF.Builder.getInt32(0); 10799 10800 llvm::Value *ThreadLimitVal = 10801 ThreadLimit 10802 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10803 CGF.CGM.Int32Ty, /* isSigned = */ true) 10804 : CGF.Builder.getInt32(0); 10805 10806 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10807 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10808 ThreadLimitVal}; 10809 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10810 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10811 PushNumTeamsArgs); 10812 } 10813 10814 void CGOpenMPRuntime::emitTargetDataCalls( 10815 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10816 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10817 if (!CGF.HaveInsertPoint()) 10818 return; 10819 10820 // Action used to replace the default codegen action and turn privatization 10821 // off. 10822 PrePostActionTy NoPrivAction; 10823 10824 // Generate the code for the opening of the data environment. Capture all the 10825 // arguments of the runtime call by reference because they are used in the 10826 // closing of the region. 10827 auto &&BeginThenGen = [this, &D, Device, &Info, 10828 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10829 // Fill up the arrays with all the mapped variables. 10830 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10831 10832 // Get map clause information. 10833 MappableExprsHandler MEHandler(D, CGF); 10834 MEHandler.generateAllInfo(CombinedInfo); 10835 10836 // Fill up the arrays and create the arguments. 10837 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10838 /*IsNonContiguous=*/true); 10839 10840 llvm::Value *BasePointersArrayArg = nullptr; 10841 llvm::Value *PointersArrayArg = nullptr; 10842 llvm::Value *SizesArrayArg = nullptr; 10843 llvm::Value *MapTypesArrayArg = nullptr; 10844 llvm::Value *MapNamesArrayArg = nullptr; 10845 llvm::Value *MappersArrayArg = nullptr; 10846 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10847 SizesArrayArg, MapTypesArrayArg, 10848 MapNamesArrayArg, MappersArrayArg, Info); 10849 10850 // Emit device ID if any. 10851 llvm::Value *DeviceID = nullptr; 10852 if (Device) { 10853 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10854 CGF.Int64Ty, /*isSigned=*/true); 10855 } else { 10856 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10857 } 10858 10859 // Emit the number of elements in the offloading arrays. 10860 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10861 // 10862 // Source location for the ident struct 10863 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10864 10865 llvm::Value *OffloadingArgs[] = {RTLoc, 10866 DeviceID, 10867 PointerNum, 10868 BasePointersArrayArg, 10869 PointersArrayArg, 10870 SizesArrayArg, 10871 MapTypesArrayArg, 10872 MapNamesArrayArg, 10873 MappersArrayArg}; 10874 CGF.EmitRuntimeCall( 10875 OMPBuilder.getOrCreateRuntimeFunction( 10876 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10877 OffloadingArgs); 10878 10879 // If device pointer privatization is required, emit the body of the region 10880 // here. It will have to be duplicated: with and without privatization. 10881 if (!Info.CaptureDeviceAddrMap.empty()) 10882 CodeGen(CGF); 10883 }; 10884 10885 // Generate code for the closing of the data region. 10886 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10887 PrePostActionTy &) { 10888 assert(Info.isValid() && "Invalid data environment closing arguments."); 10889 10890 llvm::Value *BasePointersArrayArg = nullptr; 10891 llvm::Value *PointersArrayArg = nullptr; 10892 llvm::Value *SizesArrayArg = nullptr; 10893 llvm::Value *MapTypesArrayArg = nullptr; 10894 llvm::Value *MapNamesArrayArg = nullptr; 10895 llvm::Value *MappersArrayArg = nullptr; 10896 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10897 SizesArrayArg, MapTypesArrayArg, 10898 MapNamesArrayArg, MappersArrayArg, Info, 10899 {/*ForEndCall=*/true}); 10900 10901 // Emit device ID if any. 10902 llvm::Value *DeviceID = nullptr; 10903 if (Device) { 10904 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10905 CGF.Int64Ty, /*isSigned=*/true); 10906 } else { 10907 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10908 } 10909 10910 // Emit the number of elements in the offloading arrays. 10911 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10912 10913 // Source location for the ident struct 10914 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10915 10916 llvm::Value *OffloadingArgs[] = {RTLoc, 10917 DeviceID, 10918 PointerNum, 10919 BasePointersArrayArg, 10920 PointersArrayArg, 10921 SizesArrayArg, 10922 MapTypesArrayArg, 10923 MapNamesArrayArg, 10924 MappersArrayArg}; 10925 CGF.EmitRuntimeCall( 10926 OMPBuilder.getOrCreateRuntimeFunction( 10927 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10928 OffloadingArgs); 10929 }; 10930 10931 // If we need device pointer privatization, we need to emit the body of the 10932 // region with no privatization in the 'else' branch of the conditional. 10933 // Otherwise, we don't have to do anything. 10934 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10935 PrePostActionTy &) { 10936 if (!Info.CaptureDeviceAddrMap.empty()) { 10937 CodeGen.setAction(NoPrivAction); 10938 CodeGen(CGF); 10939 } 10940 }; 10941 10942 // We don't have to do anything to close the region if the if clause evaluates 10943 // to false. 10944 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10945 10946 if (IfCond) { 10947 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10948 } else { 10949 RegionCodeGenTy RCG(BeginThenGen); 10950 RCG(CGF); 10951 } 10952 10953 // If we don't require privatization of device pointers, we emit the body in 10954 // between the runtime calls. This avoids duplicating the body code. 10955 if (Info.CaptureDeviceAddrMap.empty()) { 10956 CodeGen.setAction(NoPrivAction); 10957 CodeGen(CGF); 10958 } 10959 10960 if (IfCond) { 10961 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10962 } else { 10963 RegionCodeGenTy RCG(EndThenGen); 10964 RCG(CGF); 10965 } 10966 } 10967 10968 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10969 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10970 const Expr *Device) { 10971 if (!CGF.HaveInsertPoint()) 10972 return; 10973 10974 assert((isa<OMPTargetEnterDataDirective>(D) || 10975 isa<OMPTargetExitDataDirective>(D) || 10976 isa<OMPTargetUpdateDirective>(D)) && 10977 "Expecting either target enter, exit data, or update directives."); 10978 10979 CodeGenFunction::OMPTargetDataInfo InputInfo; 10980 llvm::Value *MapTypesArray = nullptr; 10981 llvm::Value *MapNamesArray = nullptr; 10982 // Generate the code for the opening of the data environment. 10983 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10984 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10985 // Emit device ID if any. 10986 llvm::Value *DeviceID = nullptr; 10987 if (Device) { 10988 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10989 CGF.Int64Ty, /*isSigned=*/true); 10990 } else { 10991 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10992 } 10993 10994 // Emit the number of elements in the offloading arrays. 10995 llvm::Constant *PointerNum = 10996 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10997 10998 // Source location for the ident struct 10999 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11000 11001 llvm::Value *OffloadingArgs[] = {RTLoc, 11002 DeviceID, 11003 PointerNum, 11004 InputInfo.BasePointersArray.getPointer(), 11005 InputInfo.PointersArray.getPointer(), 11006 InputInfo.SizesArray.getPointer(), 11007 MapTypesArray, 11008 MapNamesArray, 11009 InputInfo.MappersArray.getPointer()}; 11010 11011 // Select the right runtime function call for each standalone 11012 // directive. 11013 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11014 RuntimeFunction RTLFn; 11015 switch (D.getDirectiveKind()) { 11016 case OMPD_target_enter_data: 11017 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11018 : OMPRTL___tgt_target_data_begin_mapper; 11019 break; 11020 case OMPD_target_exit_data: 11021 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11022 : OMPRTL___tgt_target_data_end_mapper; 11023 break; 11024 case OMPD_target_update: 11025 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11026 : OMPRTL___tgt_target_data_update_mapper; 11027 break; 11028 case OMPD_parallel: 11029 case OMPD_for: 11030 case OMPD_parallel_for: 11031 case OMPD_parallel_master: 11032 case OMPD_parallel_sections: 11033 case OMPD_for_simd: 11034 case OMPD_parallel_for_simd: 11035 case OMPD_cancel: 11036 case OMPD_cancellation_point: 11037 case OMPD_ordered: 11038 case OMPD_threadprivate: 11039 case OMPD_allocate: 11040 case OMPD_task: 11041 case OMPD_simd: 11042 case OMPD_tile: 11043 case OMPD_sections: 11044 case OMPD_section: 11045 case OMPD_single: 11046 case OMPD_master: 11047 case OMPD_critical: 11048 case OMPD_taskyield: 11049 case OMPD_barrier: 11050 case OMPD_taskwait: 11051 case OMPD_taskgroup: 11052 case OMPD_atomic: 11053 case OMPD_flush: 11054 case OMPD_depobj: 11055 case OMPD_scan: 11056 case OMPD_teams: 11057 case OMPD_target_data: 11058 case OMPD_distribute: 11059 case OMPD_distribute_simd: 11060 case OMPD_distribute_parallel_for: 11061 case OMPD_distribute_parallel_for_simd: 11062 case OMPD_teams_distribute: 11063 case OMPD_teams_distribute_simd: 11064 case OMPD_teams_distribute_parallel_for: 11065 case OMPD_teams_distribute_parallel_for_simd: 11066 case OMPD_declare_simd: 11067 case OMPD_declare_variant: 11068 case OMPD_begin_declare_variant: 11069 case OMPD_end_declare_variant: 11070 case OMPD_declare_target: 11071 case OMPD_end_declare_target: 11072 case OMPD_declare_reduction: 11073 case OMPD_declare_mapper: 11074 case OMPD_taskloop: 11075 case OMPD_taskloop_simd: 11076 case OMPD_master_taskloop: 11077 case OMPD_master_taskloop_simd: 11078 case OMPD_parallel_master_taskloop: 11079 case OMPD_parallel_master_taskloop_simd: 11080 case OMPD_target: 11081 case OMPD_target_simd: 11082 case OMPD_target_teams_distribute: 11083 case OMPD_target_teams_distribute_simd: 11084 case OMPD_target_teams_distribute_parallel_for: 11085 case OMPD_target_teams_distribute_parallel_for_simd: 11086 case OMPD_target_teams: 11087 case OMPD_target_parallel: 11088 case OMPD_target_parallel_for: 11089 case OMPD_target_parallel_for_simd: 11090 case OMPD_requires: 11091 case OMPD_unknown: 11092 default: 11093 llvm_unreachable("Unexpected standalone target data directive."); 11094 break; 11095 } 11096 CGF.EmitRuntimeCall( 11097 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11098 OffloadingArgs); 11099 }; 11100 11101 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11102 &MapNamesArray](CodeGenFunction &CGF, 11103 PrePostActionTy &) { 11104 // Fill up the arrays with all the mapped variables. 11105 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11106 11107 // Get map clause information. 11108 MappableExprsHandler MEHandler(D, CGF); 11109 MEHandler.generateAllInfo(CombinedInfo); 11110 11111 TargetDataInfo Info; 11112 // Fill up the arrays and create the arguments. 11113 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11114 /*IsNonContiguous=*/true); 11115 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11116 D.hasClausesOfKind<OMPNowaitClause>(); 11117 emitOffloadingArraysArgument( 11118 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11119 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11120 {/*ForEndTask=*/false}); 11121 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11122 InputInfo.BasePointersArray = 11123 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11124 InputInfo.PointersArray = 11125 Address(Info.PointersArray, CGM.getPointerAlign()); 11126 InputInfo.SizesArray = 11127 Address(Info.SizesArray, CGM.getPointerAlign()); 11128 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11129 MapTypesArray = Info.MapTypesArray; 11130 MapNamesArray = Info.MapNamesArray; 11131 if (RequiresOuterTask) 11132 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11133 else 11134 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11135 }; 11136 11137 if (IfCond) { 11138 emitIfClause(CGF, IfCond, TargetThenGen, 11139 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11140 } else { 11141 RegionCodeGenTy ThenRCG(TargetThenGen); 11142 ThenRCG(CGF); 11143 } 11144 } 11145 11146 namespace { 11147 /// Kind of parameter in a function with 'declare simd' directive. 11148 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11149 /// Attribute set of the parameter. 11150 struct ParamAttrTy { 11151 ParamKindTy Kind = Vector; 11152 llvm::APSInt StrideOrArg; 11153 llvm::APSInt Alignment; 11154 }; 11155 } // namespace 11156 11157 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11158 ArrayRef<ParamAttrTy> ParamAttrs) { 11159 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11160 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11161 // of that clause. The VLEN value must be power of 2. 11162 // In other case the notion of the function`s "characteristic data type" (CDT) 11163 // is used to compute the vector length. 11164 // CDT is defined in the following order: 11165 // a) For non-void function, the CDT is the return type. 11166 // b) If the function has any non-uniform, non-linear parameters, then the 11167 // CDT is the type of the first such parameter. 11168 // c) If the CDT determined by a) or b) above is struct, union, or class 11169 // type which is pass-by-value (except for the type that maps to the 11170 // built-in complex data type), the characteristic data type is int. 11171 // d) If none of the above three cases is applicable, the CDT is int. 11172 // The VLEN is then determined based on the CDT and the size of vector 11173 // register of that ISA for which current vector version is generated. The 11174 // VLEN is computed using the formula below: 11175 // VLEN = sizeof(vector_register) / sizeof(CDT), 11176 // where vector register size specified in section 3.2.1 Registers and the 11177 // Stack Frame of original AMD64 ABI document. 11178 QualType RetType = FD->getReturnType(); 11179 if (RetType.isNull()) 11180 return 0; 11181 ASTContext &C = FD->getASTContext(); 11182 QualType CDT; 11183 if (!RetType.isNull() && !RetType->isVoidType()) { 11184 CDT = RetType; 11185 } else { 11186 unsigned Offset = 0; 11187 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11188 if (ParamAttrs[Offset].Kind == Vector) 11189 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11190 ++Offset; 11191 } 11192 if (CDT.isNull()) { 11193 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11194 if (ParamAttrs[I + Offset].Kind == Vector) { 11195 CDT = FD->getParamDecl(I)->getType(); 11196 break; 11197 } 11198 } 11199 } 11200 } 11201 if (CDT.isNull()) 11202 CDT = C.IntTy; 11203 CDT = CDT->getCanonicalTypeUnqualified(); 11204 if (CDT->isRecordType() || CDT->isUnionType()) 11205 CDT = C.IntTy; 11206 return C.getTypeSize(CDT); 11207 } 11208 11209 static void 11210 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11211 const llvm::APSInt &VLENVal, 11212 ArrayRef<ParamAttrTy> ParamAttrs, 11213 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11214 struct ISADataTy { 11215 char ISA; 11216 unsigned VecRegSize; 11217 }; 11218 ISADataTy ISAData[] = { 11219 { 11220 'b', 128 11221 }, // SSE 11222 { 11223 'c', 256 11224 }, // AVX 11225 { 11226 'd', 256 11227 }, // AVX2 11228 { 11229 'e', 512 11230 }, // AVX512 11231 }; 11232 llvm::SmallVector<char, 2> Masked; 11233 switch (State) { 11234 case OMPDeclareSimdDeclAttr::BS_Undefined: 11235 Masked.push_back('N'); 11236 Masked.push_back('M'); 11237 break; 11238 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11239 Masked.push_back('N'); 11240 break; 11241 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11242 Masked.push_back('M'); 11243 break; 11244 } 11245 for (char Mask : Masked) { 11246 for (const ISADataTy &Data : ISAData) { 11247 SmallString<256> Buffer; 11248 llvm::raw_svector_ostream Out(Buffer); 11249 Out << "_ZGV" << Data.ISA << Mask; 11250 if (!VLENVal) { 11251 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11252 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11253 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11254 } else { 11255 Out << VLENVal; 11256 } 11257 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11258 switch (ParamAttr.Kind){ 11259 case LinearWithVarStride: 11260 Out << 's' << ParamAttr.StrideOrArg; 11261 break; 11262 case Linear: 11263 Out << 'l'; 11264 if (ParamAttr.StrideOrArg != 1) 11265 Out << ParamAttr.StrideOrArg; 11266 break; 11267 case Uniform: 11268 Out << 'u'; 11269 break; 11270 case Vector: 11271 Out << 'v'; 11272 break; 11273 } 11274 if (!!ParamAttr.Alignment) 11275 Out << 'a' << ParamAttr.Alignment; 11276 } 11277 Out << '_' << Fn->getName(); 11278 Fn->addFnAttr(Out.str()); 11279 } 11280 } 11281 } 11282 11283 // This are the Functions that are needed to mangle the name of the 11284 // vector functions generated by the compiler, according to the rules 11285 // defined in the "Vector Function ABI specifications for AArch64", 11286 // available at 11287 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11288 11289 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11290 /// 11291 /// TODO: Need to implement the behavior for reference marked with a 11292 /// var or no linear modifiers (1.b in the section). For this, we 11293 /// need to extend ParamKindTy to support the linear modifiers. 11294 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11295 QT = QT.getCanonicalType(); 11296 11297 if (QT->isVoidType()) 11298 return false; 11299 11300 if (Kind == ParamKindTy::Uniform) 11301 return false; 11302 11303 if (Kind == ParamKindTy::Linear) 11304 return false; 11305 11306 // TODO: Handle linear references with modifiers 11307 11308 if (Kind == ParamKindTy::LinearWithVarStride) 11309 return false; 11310 11311 return true; 11312 } 11313 11314 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11315 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11316 QT = QT.getCanonicalType(); 11317 unsigned Size = C.getTypeSize(QT); 11318 11319 // Only scalars and complex within 16 bytes wide set PVB to true. 11320 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11321 return false; 11322 11323 if (QT->isFloatingType()) 11324 return true; 11325 11326 if (QT->isIntegerType()) 11327 return true; 11328 11329 if (QT->isPointerType()) 11330 return true; 11331 11332 // TODO: Add support for complex types (section 3.1.2, item 2). 11333 11334 return false; 11335 } 11336 11337 /// Computes the lane size (LS) of a return type or of an input parameter, 11338 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11339 /// TODO: Add support for references, section 3.2.1, item 1. 11340 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11341 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11342 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11343 if (getAArch64PBV(PTy, C)) 11344 return C.getTypeSize(PTy); 11345 } 11346 if (getAArch64PBV(QT, C)) 11347 return C.getTypeSize(QT); 11348 11349 return C.getTypeSize(C.getUIntPtrType()); 11350 } 11351 11352 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11353 // signature of the scalar function, as defined in 3.2.2 of the 11354 // AAVFABI. 11355 static std::tuple<unsigned, unsigned, bool> 11356 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11357 QualType RetType = FD->getReturnType().getCanonicalType(); 11358 11359 ASTContext &C = FD->getASTContext(); 11360 11361 bool OutputBecomesInput = false; 11362 11363 llvm::SmallVector<unsigned, 8> Sizes; 11364 if (!RetType->isVoidType()) { 11365 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11366 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11367 OutputBecomesInput = true; 11368 } 11369 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11370 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11371 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11372 } 11373 11374 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11375 // The LS of a function parameter / return value can only be a power 11376 // of 2, starting from 8 bits, up to 128. 11377 assert(std::all_of(Sizes.begin(), Sizes.end(), 11378 [](unsigned Size) { 11379 return Size == 8 || Size == 16 || Size == 32 || 11380 Size == 64 || Size == 128; 11381 }) && 11382 "Invalid size"); 11383 11384 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11385 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11386 OutputBecomesInput); 11387 } 11388 11389 /// Mangle the parameter part of the vector function name according to 11390 /// their OpenMP classification. The mangling function is defined in 11391 /// section 3.5 of the AAVFABI. 11392 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11393 SmallString<256> Buffer; 11394 llvm::raw_svector_ostream Out(Buffer); 11395 for (const auto &ParamAttr : ParamAttrs) { 11396 switch (ParamAttr.Kind) { 11397 case LinearWithVarStride: 11398 Out << "ls" << ParamAttr.StrideOrArg; 11399 break; 11400 case Linear: 11401 Out << 'l'; 11402 // Don't print the step value if it is not present or if it is 11403 // equal to 1. 11404 if (ParamAttr.StrideOrArg != 1) 11405 Out << ParamAttr.StrideOrArg; 11406 break; 11407 case Uniform: 11408 Out << 'u'; 11409 break; 11410 case Vector: 11411 Out << 'v'; 11412 break; 11413 } 11414 11415 if (!!ParamAttr.Alignment) 11416 Out << 'a' << ParamAttr.Alignment; 11417 } 11418 11419 return std::string(Out.str()); 11420 } 11421 11422 // Function used to add the attribute. The parameter `VLEN` is 11423 // templated to allow the use of "x" when targeting scalable functions 11424 // for SVE. 11425 template <typename T> 11426 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11427 char ISA, StringRef ParSeq, 11428 StringRef MangledName, bool OutputBecomesInput, 11429 llvm::Function *Fn) { 11430 SmallString<256> Buffer; 11431 llvm::raw_svector_ostream Out(Buffer); 11432 Out << Prefix << ISA << LMask << VLEN; 11433 if (OutputBecomesInput) 11434 Out << "v"; 11435 Out << ParSeq << "_" << MangledName; 11436 Fn->addFnAttr(Out.str()); 11437 } 11438 11439 // Helper function to generate the Advanced SIMD names depending on 11440 // the value of the NDS when simdlen is not present. 11441 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11442 StringRef Prefix, char ISA, 11443 StringRef ParSeq, StringRef MangledName, 11444 bool OutputBecomesInput, 11445 llvm::Function *Fn) { 11446 switch (NDS) { 11447 case 8: 11448 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11449 OutputBecomesInput, Fn); 11450 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11451 OutputBecomesInput, Fn); 11452 break; 11453 case 16: 11454 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11455 OutputBecomesInput, Fn); 11456 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11457 OutputBecomesInput, Fn); 11458 break; 11459 case 32: 11460 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11461 OutputBecomesInput, Fn); 11462 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11463 OutputBecomesInput, Fn); 11464 break; 11465 case 64: 11466 case 128: 11467 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11468 OutputBecomesInput, Fn); 11469 break; 11470 default: 11471 llvm_unreachable("Scalar type is too wide."); 11472 } 11473 } 11474 11475 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11476 static void emitAArch64DeclareSimdFunction( 11477 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11478 ArrayRef<ParamAttrTy> ParamAttrs, 11479 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11480 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11481 11482 // Get basic data for building the vector signature. 11483 const auto Data = getNDSWDS(FD, ParamAttrs); 11484 const unsigned NDS = std::get<0>(Data); 11485 const unsigned WDS = std::get<1>(Data); 11486 const bool OutputBecomesInput = std::get<2>(Data); 11487 11488 // Check the values provided via `simdlen` by the user. 11489 // 1. A `simdlen(1)` doesn't produce vector signatures, 11490 if (UserVLEN == 1) { 11491 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11492 DiagnosticsEngine::Warning, 11493 "The clause simdlen(1) has no effect when targeting aarch64."); 11494 CGM.getDiags().Report(SLoc, DiagID); 11495 return; 11496 } 11497 11498 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11499 // Advanced SIMD output. 11500 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11501 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11502 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11503 "power of 2 when targeting Advanced SIMD."); 11504 CGM.getDiags().Report(SLoc, DiagID); 11505 return; 11506 } 11507 11508 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11509 // limits. 11510 if (ISA == 's' && UserVLEN != 0) { 11511 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11512 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11513 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11514 "lanes in the architectural constraints " 11515 "for SVE (min is 128-bit, max is " 11516 "2048-bit, by steps of 128-bit)"); 11517 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11518 return; 11519 } 11520 } 11521 11522 // Sort out parameter sequence. 11523 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11524 StringRef Prefix = "_ZGV"; 11525 // Generate simdlen from user input (if any). 11526 if (UserVLEN) { 11527 if (ISA == 's') { 11528 // SVE generates only a masked function. 11529 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11530 OutputBecomesInput, Fn); 11531 } else { 11532 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11533 // Advanced SIMD generates one or two functions, depending on 11534 // the `[not]inbranch` clause. 11535 switch (State) { 11536 case OMPDeclareSimdDeclAttr::BS_Undefined: 11537 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11538 OutputBecomesInput, Fn); 11539 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11540 OutputBecomesInput, Fn); 11541 break; 11542 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11543 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11544 OutputBecomesInput, Fn); 11545 break; 11546 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11547 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11548 OutputBecomesInput, Fn); 11549 break; 11550 } 11551 } 11552 } else { 11553 // If no user simdlen is provided, follow the AAVFABI rules for 11554 // generating the vector length. 11555 if (ISA == 's') { 11556 // SVE, section 3.4.1, item 1. 11557 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11558 OutputBecomesInput, Fn); 11559 } else { 11560 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11561 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11562 // two vector names depending on the use of the clause 11563 // `[not]inbranch`. 11564 switch (State) { 11565 case OMPDeclareSimdDeclAttr::BS_Undefined: 11566 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11567 OutputBecomesInput, Fn); 11568 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11569 OutputBecomesInput, Fn); 11570 break; 11571 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11572 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11573 OutputBecomesInput, Fn); 11574 break; 11575 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11576 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11577 OutputBecomesInput, Fn); 11578 break; 11579 } 11580 } 11581 } 11582 } 11583 11584 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11585 llvm::Function *Fn) { 11586 ASTContext &C = CGM.getContext(); 11587 FD = FD->getMostRecentDecl(); 11588 // Map params to their positions in function decl. 11589 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11590 if (isa<CXXMethodDecl>(FD)) 11591 ParamPositions.try_emplace(FD, 0); 11592 unsigned ParamPos = ParamPositions.size(); 11593 for (const ParmVarDecl *P : FD->parameters()) { 11594 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11595 ++ParamPos; 11596 } 11597 while (FD) { 11598 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11599 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11600 // Mark uniform parameters. 11601 for (const Expr *E : Attr->uniforms()) { 11602 E = E->IgnoreParenImpCasts(); 11603 unsigned Pos; 11604 if (isa<CXXThisExpr>(E)) { 11605 Pos = ParamPositions[FD]; 11606 } else { 11607 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11608 ->getCanonicalDecl(); 11609 Pos = ParamPositions[PVD]; 11610 } 11611 ParamAttrs[Pos].Kind = Uniform; 11612 } 11613 // Get alignment info. 11614 auto NI = Attr->alignments_begin(); 11615 for (const Expr *E : Attr->aligneds()) { 11616 E = E->IgnoreParenImpCasts(); 11617 unsigned Pos; 11618 QualType ParmTy; 11619 if (isa<CXXThisExpr>(E)) { 11620 Pos = ParamPositions[FD]; 11621 ParmTy = E->getType(); 11622 } else { 11623 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11624 ->getCanonicalDecl(); 11625 Pos = ParamPositions[PVD]; 11626 ParmTy = PVD->getType(); 11627 } 11628 ParamAttrs[Pos].Alignment = 11629 (*NI) 11630 ? (*NI)->EvaluateKnownConstInt(C) 11631 : llvm::APSInt::getUnsigned( 11632 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11633 .getQuantity()); 11634 ++NI; 11635 } 11636 // Mark linear parameters. 11637 auto SI = Attr->steps_begin(); 11638 auto MI = Attr->modifiers_begin(); 11639 for (const Expr *E : Attr->linears()) { 11640 E = E->IgnoreParenImpCasts(); 11641 unsigned Pos; 11642 // Rescaling factor needed to compute the linear parameter 11643 // value in the mangled name. 11644 unsigned PtrRescalingFactor = 1; 11645 if (isa<CXXThisExpr>(E)) { 11646 Pos = ParamPositions[FD]; 11647 } else { 11648 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11649 ->getCanonicalDecl(); 11650 Pos = ParamPositions[PVD]; 11651 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11652 PtrRescalingFactor = CGM.getContext() 11653 .getTypeSizeInChars(P->getPointeeType()) 11654 .getQuantity(); 11655 } 11656 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11657 ParamAttr.Kind = Linear; 11658 // Assuming a stride of 1, for `linear` without modifiers. 11659 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11660 if (*SI) { 11661 Expr::EvalResult Result; 11662 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11663 if (const auto *DRE = 11664 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11665 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11666 ParamAttr.Kind = LinearWithVarStride; 11667 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11668 ParamPositions[StridePVD->getCanonicalDecl()]); 11669 } 11670 } 11671 } else { 11672 ParamAttr.StrideOrArg = Result.Val.getInt(); 11673 } 11674 } 11675 // If we are using a linear clause on a pointer, we need to 11676 // rescale the value of linear_step with the byte size of the 11677 // pointee type. 11678 if (Linear == ParamAttr.Kind) 11679 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11680 ++SI; 11681 ++MI; 11682 } 11683 llvm::APSInt VLENVal; 11684 SourceLocation ExprLoc; 11685 const Expr *VLENExpr = Attr->getSimdlen(); 11686 if (VLENExpr) { 11687 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11688 ExprLoc = VLENExpr->getExprLoc(); 11689 } 11690 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11691 if (CGM.getTriple().isX86()) { 11692 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11693 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11694 unsigned VLEN = VLENVal.getExtValue(); 11695 StringRef MangledName = Fn->getName(); 11696 if (CGM.getTarget().hasFeature("sve")) 11697 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11698 MangledName, 's', 128, Fn, ExprLoc); 11699 if (CGM.getTarget().hasFeature("neon")) 11700 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11701 MangledName, 'n', 128, Fn, ExprLoc); 11702 } 11703 } 11704 FD = FD->getPreviousDecl(); 11705 } 11706 } 11707 11708 namespace { 11709 /// Cleanup action for doacross support. 11710 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11711 public: 11712 static const int DoacrossFinArgs = 2; 11713 11714 private: 11715 llvm::FunctionCallee RTLFn; 11716 llvm::Value *Args[DoacrossFinArgs]; 11717 11718 public: 11719 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11720 ArrayRef<llvm::Value *> CallArgs) 11721 : RTLFn(RTLFn) { 11722 assert(CallArgs.size() == DoacrossFinArgs); 11723 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11724 } 11725 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11726 if (!CGF.HaveInsertPoint()) 11727 return; 11728 CGF.EmitRuntimeCall(RTLFn, Args); 11729 } 11730 }; 11731 } // namespace 11732 11733 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11734 const OMPLoopDirective &D, 11735 ArrayRef<Expr *> NumIterations) { 11736 if (!CGF.HaveInsertPoint()) 11737 return; 11738 11739 ASTContext &C = CGM.getContext(); 11740 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11741 RecordDecl *RD; 11742 if (KmpDimTy.isNull()) { 11743 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11744 // kmp_int64 lo; // lower 11745 // kmp_int64 up; // upper 11746 // kmp_int64 st; // stride 11747 // }; 11748 RD = C.buildImplicitRecord("kmp_dim"); 11749 RD->startDefinition(); 11750 addFieldToRecordDecl(C, RD, Int64Ty); 11751 addFieldToRecordDecl(C, RD, Int64Ty); 11752 addFieldToRecordDecl(C, RD, Int64Ty); 11753 RD->completeDefinition(); 11754 KmpDimTy = C.getRecordType(RD); 11755 } else { 11756 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11757 } 11758 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11759 QualType ArrayTy = 11760 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11761 11762 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11763 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11764 enum { LowerFD = 0, UpperFD, StrideFD }; 11765 // Fill dims with data. 11766 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11767 LValue DimsLVal = CGF.MakeAddrLValue( 11768 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11769 // dims.upper = num_iterations; 11770 LValue UpperLVal = CGF.EmitLValueForField( 11771 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11772 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11773 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11774 Int64Ty, NumIterations[I]->getExprLoc()); 11775 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11776 // dims.stride = 1; 11777 LValue StrideLVal = CGF.EmitLValueForField( 11778 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11779 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11780 StrideLVal); 11781 } 11782 11783 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11784 // kmp_int32 num_dims, struct kmp_dim * dims); 11785 llvm::Value *Args[] = { 11786 emitUpdateLocation(CGF, D.getBeginLoc()), 11787 getThreadID(CGF, D.getBeginLoc()), 11788 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11789 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11790 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11791 CGM.VoidPtrTy)}; 11792 11793 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11794 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11795 CGF.EmitRuntimeCall(RTLFn, Args); 11796 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11797 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11798 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11799 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11800 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11801 llvm::makeArrayRef(FiniArgs)); 11802 } 11803 11804 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11805 const OMPDependClause *C) { 11806 QualType Int64Ty = 11807 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11808 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11809 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11810 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11811 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11812 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11813 const Expr *CounterVal = C->getLoopData(I); 11814 assert(CounterVal); 11815 llvm::Value *CntVal = CGF.EmitScalarConversion( 11816 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11817 CounterVal->getExprLoc()); 11818 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11819 /*Volatile=*/false, Int64Ty); 11820 } 11821 llvm::Value *Args[] = { 11822 emitUpdateLocation(CGF, C->getBeginLoc()), 11823 getThreadID(CGF, C->getBeginLoc()), 11824 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11825 llvm::FunctionCallee RTLFn; 11826 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11827 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11828 OMPRTL___kmpc_doacross_post); 11829 } else { 11830 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11831 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11832 OMPRTL___kmpc_doacross_wait); 11833 } 11834 CGF.EmitRuntimeCall(RTLFn, Args); 11835 } 11836 11837 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11838 llvm::FunctionCallee Callee, 11839 ArrayRef<llvm::Value *> Args) const { 11840 assert(Loc.isValid() && "Outlined function call location must be valid."); 11841 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11842 11843 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11844 if (Fn->doesNotThrow()) { 11845 CGF.EmitNounwindRuntimeCall(Fn, Args); 11846 return; 11847 } 11848 } 11849 CGF.EmitRuntimeCall(Callee, Args); 11850 } 11851 11852 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11853 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11854 ArrayRef<llvm::Value *> Args) const { 11855 emitCall(CGF, Loc, OutlinedFn, Args); 11856 } 11857 11858 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11859 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11860 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11861 HasEmittedDeclareTargetRegion = true; 11862 } 11863 11864 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11865 const VarDecl *NativeParam, 11866 const VarDecl *TargetParam) const { 11867 return CGF.GetAddrOfLocalVar(NativeParam); 11868 } 11869 11870 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11871 const VarDecl *VD) { 11872 if (!VD) 11873 return Address::invalid(); 11874 Address UntiedAddr = Address::invalid(); 11875 Address UntiedRealAddr = Address::invalid(); 11876 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11877 if (It != FunctionToUntiedTaskStackMap.end()) { 11878 const UntiedLocalVarsAddressesMap &UntiedData = 11879 UntiedLocalVarsStack[It->second]; 11880 auto I = UntiedData.find(VD); 11881 if (I != UntiedData.end()) { 11882 UntiedAddr = I->second.first; 11883 UntiedRealAddr = I->second.second; 11884 } 11885 } 11886 const VarDecl *CVD = VD->getCanonicalDecl(); 11887 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11888 // Use the default allocation. 11889 if (!isAllocatableDecl(VD)) 11890 return UntiedAddr; 11891 llvm::Value *Size; 11892 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11893 if (CVD->getType()->isVariablyModifiedType()) { 11894 Size = CGF.getTypeSize(CVD->getType()); 11895 // Align the size: ((size + align - 1) / align) * align 11896 Size = CGF.Builder.CreateNUWAdd( 11897 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11898 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11899 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11900 } else { 11901 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11902 Size = CGM.getSize(Sz.alignTo(Align)); 11903 } 11904 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11905 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11906 assert(AA->getAllocator() && 11907 "Expected allocator expression for non-default allocator."); 11908 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11909 // According to the standard, the original allocator type is a enum 11910 // (integer). Convert to pointer type, if required. 11911 Allocator = CGF.EmitScalarConversion( 11912 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11913 AA->getAllocator()->getExprLoc()); 11914 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11915 11916 llvm::Value *Addr = 11917 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11918 CGM.getModule(), OMPRTL___kmpc_alloc), 11919 Args, getName({CVD->getName(), ".void.addr"})); 11920 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11921 CGM.getModule(), OMPRTL___kmpc_free); 11922 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11923 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11924 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11925 if (UntiedAddr.isValid()) 11926 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11927 11928 // Cleanup action for allocate support. 11929 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11930 llvm::FunctionCallee RTLFn; 11931 unsigned LocEncoding; 11932 Address Addr; 11933 const Expr *Allocator; 11934 11935 public: 11936 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11937 Address Addr, const Expr *Allocator) 11938 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11939 Allocator(Allocator) {} 11940 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11941 if (!CGF.HaveInsertPoint()) 11942 return; 11943 llvm::Value *Args[3]; 11944 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11945 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11946 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11947 Addr.getPointer(), CGF.VoidPtrTy); 11948 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11949 // According to the standard, the original allocator type is a enum 11950 // (integer). Convert to pointer type, if required. 11951 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11952 CGF.getContext().VoidPtrTy, 11953 Allocator->getExprLoc()); 11954 Args[2] = AllocVal; 11955 11956 CGF.EmitRuntimeCall(RTLFn, Args); 11957 } 11958 }; 11959 Address VDAddr = 11960 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11961 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11962 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11963 VDAddr, AA->getAllocator()); 11964 if (UntiedRealAddr.isValid()) 11965 if (auto *Region = 11966 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11967 Region->emitUntiedSwitch(CGF); 11968 return VDAddr; 11969 } 11970 return UntiedAddr; 11971 } 11972 11973 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11974 const VarDecl *VD) const { 11975 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11976 if (It == FunctionToUntiedTaskStackMap.end()) 11977 return false; 11978 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11979 } 11980 11981 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11982 CodeGenModule &CGM, const OMPLoopDirective &S) 11983 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11984 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11985 if (!NeedToPush) 11986 return; 11987 NontemporalDeclsSet &DS = 11988 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11989 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11990 for (const Stmt *Ref : C->private_refs()) { 11991 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11992 const ValueDecl *VD; 11993 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11994 VD = DRE->getDecl(); 11995 } else { 11996 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11997 assert((ME->isImplicitCXXThis() || 11998 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11999 "Expected member of current class."); 12000 VD = ME->getMemberDecl(); 12001 } 12002 DS.insert(VD); 12003 } 12004 } 12005 } 12006 12007 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12008 if (!NeedToPush) 12009 return; 12010 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12011 } 12012 12013 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12014 CodeGenFunction &CGF, 12015 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 12016 std::pair<Address, Address>> &LocalVars) 12017 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12018 if (!NeedToPush) 12019 return; 12020 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12021 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12022 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12023 } 12024 12025 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12026 if (!NeedToPush) 12027 return; 12028 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12029 } 12030 12031 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12032 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12033 12034 return llvm::any_of( 12035 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12036 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12037 } 12038 12039 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12040 const OMPExecutableDirective &S, 12041 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12042 const { 12043 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12044 // Vars in target/task regions must be excluded completely. 12045 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12046 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12047 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12048 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12049 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12050 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12051 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12052 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12053 } 12054 } 12055 // Exclude vars in private clauses. 12056 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12057 for (const Expr *Ref : C->varlists()) { 12058 if (!Ref->getType()->isScalarType()) 12059 continue; 12060 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12061 if (!DRE) 12062 continue; 12063 NeedToCheckForLPCs.insert(DRE->getDecl()); 12064 } 12065 } 12066 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12067 for (const Expr *Ref : C->varlists()) { 12068 if (!Ref->getType()->isScalarType()) 12069 continue; 12070 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12071 if (!DRE) 12072 continue; 12073 NeedToCheckForLPCs.insert(DRE->getDecl()); 12074 } 12075 } 12076 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12077 for (const Expr *Ref : C->varlists()) { 12078 if (!Ref->getType()->isScalarType()) 12079 continue; 12080 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12081 if (!DRE) 12082 continue; 12083 NeedToCheckForLPCs.insert(DRE->getDecl()); 12084 } 12085 } 12086 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12087 for (const Expr *Ref : C->varlists()) { 12088 if (!Ref->getType()->isScalarType()) 12089 continue; 12090 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12091 if (!DRE) 12092 continue; 12093 NeedToCheckForLPCs.insert(DRE->getDecl()); 12094 } 12095 } 12096 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12097 for (const Expr *Ref : C->varlists()) { 12098 if (!Ref->getType()->isScalarType()) 12099 continue; 12100 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12101 if (!DRE) 12102 continue; 12103 NeedToCheckForLPCs.insert(DRE->getDecl()); 12104 } 12105 } 12106 for (const Decl *VD : NeedToCheckForLPCs) { 12107 for (const LastprivateConditionalData &Data : 12108 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12109 if (Data.DeclToUniqueName.count(VD) > 0) { 12110 if (!Data.Disabled) 12111 NeedToAddForLPCsAsDisabled.insert(VD); 12112 break; 12113 } 12114 } 12115 } 12116 } 12117 12118 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12119 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12120 : CGM(CGF.CGM), 12121 Action((CGM.getLangOpts().OpenMP >= 50 && 12122 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12123 [](const OMPLastprivateClause *C) { 12124 return C->getKind() == 12125 OMPC_LASTPRIVATE_conditional; 12126 })) 12127 ? ActionToDo::PushAsLastprivateConditional 12128 : ActionToDo::DoNotPush) { 12129 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12130 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12131 return; 12132 assert(Action == ActionToDo::PushAsLastprivateConditional && 12133 "Expected a push action."); 12134 LastprivateConditionalData &Data = 12135 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12136 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12137 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12138 continue; 12139 12140 for (const Expr *Ref : C->varlists()) { 12141 Data.DeclToUniqueName.insert(std::make_pair( 12142 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12143 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12144 } 12145 } 12146 Data.IVLVal = IVLVal; 12147 Data.Fn = CGF.CurFn; 12148 } 12149 12150 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12151 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12152 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12153 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12154 if (CGM.getLangOpts().OpenMP < 50) 12155 return; 12156 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12157 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12158 if (!NeedToAddForLPCsAsDisabled.empty()) { 12159 Action = ActionToDo::DisableLastprivateConditional; 12160 LastprivateConditionalData &Data = 12161 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12162 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12163 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12164 Data.Fn = CGF.CurFn; 12165 Data.Disabled = true; 12166 } 12167 } 12168 12169 CGOpenMPRuntime::LastprivateConditionalRAII 12170 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12171 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12172 return LastprivateConditionalRAII(CGF, S); 12173 } 12174 12175 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12176 if (CGM.getLangOpts().OpenMP < 50) 12177 return; 12178 if (Action == ActionToDo::DisableLastprivateConditional) { 12179 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12180 "Expected list of disabled private vars."); 12181 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12182 } 12183 if (Action == ActionToDo::PushAsLastprivateConditional) { 12184 assert( 12185 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12186 "Expected list of lastprivate conditional vars."); 12187 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12188 } 12189 } 12190 12191 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12192 const VarDecl *VD) { 12193 ASTContext &C = CGM.getContext(); 12194 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12195 if (I == LastprivateConditionalToTypes.end()) 12196 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12197 QualType NewType; 12198 const FieldDecl *VDField; 12199 const FieldDecl *FiredField; 12200 LValue BaseLVal; 12201 auto VI = I->getSecond().find(VD); 12202 if (VI == I->getSecond().end()) { 12203 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12204 RD->startDefinition(); 12205 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12206 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12207 RD->completeDefinition(); 12208 NewType = C.getRecordType(RD); 12209 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12210 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12211 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12212 } else { 12213 NewType = std::get<0>(VI->getSecond()); 12214 VDField = std::get<1>(VI->getSecond()); 12215 FiredField = std::get<2>(VI->getSecond()); 12216 BaseLVal = std::get<3>(VI->getSecond()); 12217 } 12218 LValue FiredLVal = 12219 CGF.EmitLValueForField(BaseLVal, FiredField); 12220 CGF.EmitStoreOfScalar( 12221 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12222 FiredLVal); 12223 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12224 } 12225 12226 namespace { 12227 /// Checks if the lastprivate conditional variable is referenced in LHS. 12228 class LastprivateConditionalRefChecker final 12229 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12230 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12231 const Expr *FoundE = nullptr; 12232 const Decl *FoundD = nullptr; 12233 StringRef UniqueDeclName; 12234 LValue IVLVal; 12235 llvm::Function *FoundFn = nullptr; 12236 SourceLocation Loc; 12237 12238 public: 12239 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12240 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12241 llvm::reverse(LPM)) { 12242 auto It = D.DeclToUniqueName.find(E->getDecl()); 12243 if (It == D.DeclToUniqueName.end()) 12244 continue; 12245 if (D.Disabled) 12246 return false; 12247 FoundE = E; 12248 FoundD = E->getDecl()->getCanonicalDecl(); 12249 UniqueDeclName = It->second; 12250 IVLVal = D.IVLVal; 12251 FoundFn = D.Fn; 12252 break; 12253 } 12254 return FoundE == E; 12255 } 12256 bool VisitMemberExpr(const MemberExpr *E) { 12257 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12258 return false; 12259 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12260 llvm::reverse(LPM)) { 12261 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12262 if (It == D.DeclToUniqueName.end()) 12263 continue; 12264 if (D.Disabled) 12265 return false; 12266 FoundE = E; 12267 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12268 UniqueDeclName = It->second; 12269 IVLVal = D.IVLVal; 12270 FoundFn = D.Fn; 12271 break; 12272 } 12273 return FoundE == E; 12274 } 12275 bool VisitStmt(const Stmt *S) { 12276 for (const Stmt *Child : S->children()) { 12277 if (!Child) 12278 continue; 12279 if (const auto *E = dyn_cast<Expr>(Child)) 12280 if (!E->isGLValue()) 12281 continue; 12282 if (Visit(Child)) 12283 return true; 12284 } 12285 return false; 12286 } 12287 explicit LastprivateConditionalRefChecker( 12288 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12289 : LPM(LPM) {} 12290 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12291 getFoundData() const { 12292 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12293 } 12294 }; 12295 } // namespace 12296 12297 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12298 LValue IVLVal, 12299 StringRef UniqueDeclName, 12300 LValue LVal, 12301 SourceLocation Loc) { 12302 // Last updated loop counter for the lastprivate conditional var. 12303 // int<xx> last_iv = 0; 12304 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12305 llvm::Constant *LastIV = 12306 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12307 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12308 IVLVal.getAlignment().getAsAlign()); 12309 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12310 12311 // Last value of the lastprivate conditional. 12312 // decltype(priv_a) last_a; 12313 llvm::Constant *Last = getOrCreateInternalVariable( 12314 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12315 cast<llvm::GlobalVariable>(Last)->setAlignment( 12316 LVal.getAlignment().getAsAlign()); 12317 LValue LastLVal = 12318 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12319 12320 // Global loop counter. Required to handle inner parallel-for regions. 12321 // iv 12322 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12323 12324 // #pragma omp critical(a) 12325 // if (last_iv <= iv) { 12326 // last_iv = iv; 12327 // last_a = priv_a; 12328 // } 12329 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12330 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12331 Action.Enter(CGF); 12332 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12333 // (last_iv <= iv) ? Check if the variable is updated and store new 12334 // value in global var. 12335 llvm::Value *CmpRes; 12336 if (IVLVal.getType()->isSignedIntegerType()) { 12337 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12338 } else { 12339 assert(IVLVal.getType()->isUnsignedIntegerType() && 12340 "Loop iteration variable must be integer."); 12341 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12342 } 12343 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12344 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12345 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12346 // { 12347 CGF.EmitBlock(ThenBB); 12348 12349 // last_iv = iv; 12350 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12351 12352 // last_a = priv_a; 12353 switch (CGF.getEvaluationKind(LVal.getType())) { 12354 case TEK_Scalar: { 12355 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12356 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12357 break; 12358 } 12359 case TEK_Complex: { 12360 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12361 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12362 break; 12363 } 12364 case TEK_Aggregate: 12365 llvm_unreachable( 12366 "Aggregates are not supported in lastprivate conditional."); 12367 } 12368 // } 12369 CGF.EmitBranch(ExitBB); 12370 // There is no need to emit line number for unconditional branch. 12371 (void)ApplyDebugLocation::CreateEmpty(CGF); 12372 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12373 }; 12374 12375 if (CGM.getLangOpts().OpenMPSimd) { 12376 // Do not emit as a critical region as no parallel region could be emitted. 12377 RegionCodeGenTy ThenRCG(CodeGen); 12378 ThenRCG(CGF); 12379 } else { 12380 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12381 } 12382 } 12383 12384 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12385 const Expr *LHS) { 12386 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12387 return; 12388 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12389 if (!Checker.Visit(LHS)) 12390 return; 12391 const Expr *FoundE; 12392 const Decl *FoundD; 12393 StringRef UniqueDeclName; 12394 LValue IVLVal; 12395 llvm::Function *FoundFn; 12396 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12397 Checker.getFoundData(); 12398 if (FoundFn != CGF.CurFn) { 12399 // Special codegen for inner parallel regions. 12400 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12401 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12402 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12403 "Lastprivate conditional is not found in outer region."); 12404 QualType StructTy = std::get<0>(It->getSecond()); 12405 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12406 LValue PrivLVal = CGF.EmitLValue(FoundE); 12407 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12408 PrivLVal.getAddress(CGF), 12409 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12410 LValue BaseLVal = 12411 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12412 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12413 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12414 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12415 FiredLVal, llvm::AtomicOrdering::Unordered, 12416 /*IsVolatile=*/true, /*isInit=*/false); 12417 return; 12418 } 12419 12420 // Private address of the lastprivate conditional in the current context. 12421 // priv_a 12422 LValue LVal = CGF.EmitLValue(FoundE); 12423 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12424 FoundE->getExprLoc()); 12425 } 12426 12427 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12428 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12429 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12430 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12431 return; 12432 auto Range = llvm::reverse(LastprivateConditionalStack); 12433 auto It = llvm::find_if( 12434 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12435 if (It == Range.end() || It->Fn != CGF.CurFn) 12436 return; 12437 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12438 assert(LPCI != LastprivateConditionalToTypes.end() && 12439 "Lastprivates must be registered already."); 12440 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12441 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12442 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12443 for (const auto &Pair : It->DeclToUniqueName) { 12444 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12445 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12446 continue; 12447 auto I = LPCI->getSecond().find(Pair.first); 12448 assert(I != LPCI->getSecond().end() && 12449 "Lastprivate must be rehistered already."); 12450 // bool Cmp = priv_a.Fired != 0; 12451 LValue BaseLVal = std::get<3>(I->getSecond()); 12452 LValue FiredLVal = 12453 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12454 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12455 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12456 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12457 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12458 // if (Cmp) { 12459 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12460 CGF.EmitBlock(ThenBB); 12461 Address Addr = CGF.GetAddrOfLocalVar(VD); 12462 LValue LVal; 12463 if (VD->getType()->isReferenceType()) 12464 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12465 AlignmentSource::Decl); 12466 else 12467 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12468 AlignmentSource::Decl); 12469 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12470 D.getBeginLoc()); 12471 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12472 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12473 // } 12474 } 12475 } 12476 12477 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12478 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12479 SourceLocation Loc) { 12480 if (CGF.getLangOpts().OpenMP < 50) 12481 return; 12482 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12483 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12484 "Unknown lastprivate conditional variable."); 12485 StringRef UniqueName = It->second; 12486 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12487 // The variable was not updated in the region - exit. 12488 if (!GV) 12489 return; 12490 LValue LPLVal = CGF.MakeAddrLValue( 12491 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12492 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12493 CGF.EmitStoreOfScalar(Res, PrivLVal); 12494 } 12495 12496 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12497 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12498 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12499 llvm_unreachable("Not supported in SIMD-only mode"); 12500 } 12501 12502 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12503 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12504 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12505 llvm_unreachable("Not supported in SIMD-only mode"); 12506 } 12507 12508 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12509 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12510 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12511 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12512 bool Tied, unsigned &NumberOfParts) { 12513 llvm_unreachable("Not supported in SIMD-only mode"); 12514 } 12515 12516 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12517 SourceLocation Loc, 12518 llvm::Function *OutlinedFn, 12519 ArrayRef<llvm::Value *> CapturedVars, 12520 const Expr *IfCond) { 12521 llvm_unreachable("Not supported in SIMD-only mode"); 12522 } 12523 12524 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12525 CodeGenFunction &CGF, StringRef CriticalName, 12526 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12527 const Expr *Hint) { 12528 llvm_unreachable("Not supported in SIMD-only mode"); 12529 } 12530 12531 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12532 const RegionCodeGenTy &MasterOpGen, 12533 SourceLocation Loc) { 12534 llvm_unreachable("Not supported in SIMD-only mode"); 12535 } 12536 12537 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12538 SourceLocation Loc) { 12539 llvm_unreachable("Not supported in SIMD-only mode"); 12540 } 12541 12542 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12543 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12544 SourceLocation Loc) { 12545 llvm_unreachable("Not supported in SIMD-only mode"); 12546 } 12547 12548 void CGOpenMPSIMDRuntime::emitSingleRegion( 12549 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12550 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12551 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12552 ArrayRef<const Expr *> AssignmentOps) { 12553 llvm_unreachable("Not supported in SIMD-only mode"); 12554 } 12555 12556 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12557 const RegionCodeGenTy &OrderedOpGen, 12558 SourceLocation Loc, 12559 bool IsThreads) { 12560 llvm_unreachable("Not supported in SIMD-only mode"); 12561 } 12562 12563 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12564 SourceLocation Loc, 12565 OpenMPDirectiveKind Kind, 12566 bool EmitChecks, 12567 bool ForceSimpleCall) { 12568 llvm_unreachable("Not supported in SIMD-only mode"); 12569 } 12570 12571 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12572 CodeGenFunction &CGF, SourceLocation Loc, 12573 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12574 bool Ordered, const DispatchRTInput &DispatchValues) { 12575 llvm_unreachable("Not supported in SIMD-only mode"); 12576 } 12577 12578 void CGOpenMPSIMDRuntime::emitForStaticInit( 12579 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12580 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12581 llvm_unreachable("Not supported in SIMD-only mode"); 12582 } 12583 12584 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12585 CodeGenFunction &CGF, SourceLocation Loc, 12586 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12587 llvm_unreachable("Not supported in SIMD-only mode"); 12588 } 12589 12590 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12591 SourceLocation Loc, 12592 unsigned IVSize, 12593 bool IVSigned) { 12594 llvm_unreachable("Not supported in SIMD-only mode"); 12595 } 12596 12597 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12598 SourceLocation Loc, 12599 OpenMPDirectiveKind DKind) { 12600 llvm_unreachable("Not supported in SIMD-only mode"); 12601 } 12602 12603 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12604 SourceLocation Loc, 12605 unsigned IVSize, bool IVSigned, 12606 Address IL, Address LB, 12607 Address UB, Address ST) { 12608 llvm_unreachable("Not supported in SIMD-only mode"); 12609 } 12610 12611 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12612 llvm::Value *NumThreads, 12613 SourceLocation Loc) { 12614 llvm_unreachable("Not supported in SIMD-only mode"); 12615 } 12616 12617 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12618 ProcBindKind ProcBind, 12619 SourceLocation Loc) { 12620 llvm_unreachable("Not supported in SIMD-only mode"); 12621 } 12622 12623 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12624 const VarDecl *VD, 12625 Address VDAddr, 12626 SourceLocation Loc) { 12627 llvm_unreachable("Not supported in SIMD-only mode"); 12628 } 12629 12630 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12631 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12632 CodeGenFunction *CGF) { 12633 llvm_unreachable("Not supported in SIMD-only mode"); 12634 } 12635 12636 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12637 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12638 llvm_unreachable("Not supported in SIMD-only mode"); 12639 } 12640 12641 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12642 ArrayRef<const Expr *> Vars, 12643 SourceLocation Loc, 12644 llvm::AtomicOrdering AO) { 12645 llvm_unreachable("Not supported in SIMD-only mode"); 12646 } 12647 12648 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12649 const OMPExecutableDirective &D, 12650 llvm::Function *TaskFunction, 12651 QualType SharedsTy, Address Shareds, 12652 const Expr *IfCond, 12653 const OMPTaskDataTy &Data) { 12654 llvm_unreachable("Not supported in SIMD-only mode"); 12655 } 12656 12657 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12658 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12659 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12660 const Expr *IfCond, const OMPTaskDataTy &Data) { 12661 llvm_unreachable("Not supported in SIMD-only mode"); 12662 } 12663 12664 void CGOpenMPSIMDRuntime::emitReduction( 12665 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12666 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12667 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12668 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12669 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12670 ReductionOps, Options); 12671 } 12672 12673 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12674 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12675 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12676 llvm_unreachable("Not supported in SIMD-only mode"); 12677 } 12678 12679 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12680 SourceLocation Loc, 12681 bool IsWorksharingReduction) { 12682 llvm_unreachable("Not supported in SIMD-only mode"); 12683 } 12684 12685 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12686 SourceLocation Loc, 12687 ReductionCodeGen &RCG, 12688 unsigned N) { 12689 llvm_unreachable("Not supported in SIMD-only mode"); 12690 } 12691 12692 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12693 SourceLocation Loc, 12694 llvm::Value *ReductionsPtr, 12695 LValue SharedLVal) { 12696 llvm_unreachable("Not supported in SIMD-only mode"); 12697 } 12698 12699 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12700 SourceLocation Loc) { 12701 llvm_unreachable("Not supported in SIMD-only mode"); 12702 } 12703 12704 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12705 CodeGenFunction &CGF, SourceLocation Loc, 12706 OpenMPDirectiveKind CancelRegion) { 12707 llvm_unreachable("Not supported in SIMD-only mode"); 12708 } 12709 12710 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12711 SourceLocation Loc, const Expr *IfCond, 12712 OpenMPDirectiveKind CancelRegion) { 12713 llvm_unreachable("Not supported in SIMD-only mode"); 12714 } 12715 12716 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12717 const OMPExecutableDirective &D, StringRef ParentName, 12718 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12719 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12720 llvm_unreachable("Not supported in SIMD-only mode"); 12721 } 12722 12723 void CGOpenMPSIMDRuntime::emitTargetCall( 12724 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12725 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12726 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12727 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12728 const OMPLoopDirective &D)> 12729 SizeEmitter) { 12730 llvm_unreachable("Not supported in SIMD-only mode"); 12731 } 12732 12733 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12734 llvm_unreachable("Not supported in SIMD-only mode"); 12735 } 12736 12737 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12738 llvm_unreachable("Not supported in SIMD-only mode"); 12739 } 12740 12741 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12742 return false; 12743 } 12744 12745 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12746 const OMPExecutableDirective &D, 12747 SourceLocation Loc, 12748 llvm::Function *OutlinedFn, 12749 ArrayRef<llvm::Value *> CapturedVars) { 12750 llvm_unreachable("Not supported in SIMD-only mode"); 12751 } 12752 12753 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12754 const Expr *NumTeams, 12755 const Expr *ThreadLimit, 12756 SourceLocation Loc) { 12757 llvm_unreachable("Not supported in SIMD-only mode"); 12758 } 12759 12760 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12761 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12762 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12763 llvm_unreachable("Not supported in SIMD-only mode"); 12764 } 12765 12766 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12767 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12768 const Expr *Device) { 12769 llvm_unreachable("Not supported in SIMD-only mode"); 12770 } 12771 12772 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12773 const OMPLoopDirective &D, 12774 ArrayRef<Expr *> NumIterations) { 12775 llvm_unreachable("Not supported in SIMD-only mode"); 12776 } 12777 12778 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12779 const OMPDependClause *C) { 12780 llvm_unreachable("Not supported in SIMD-only mode"); 12781 } 12782 12783 const VarDecl * 12784 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12785 const VarDecl *NativeParam) const { 12786 llvm_unreachable("Not supported in SIMD-only mode"); 12787 } 12788 12789 Address 12790 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12791 const VarDecl *NativeParam, 12792 const VarDecl *TargetParam) const { 12793 llvm_unreachable("Not supported in SIMD-only mode"); 12794 } 12795