1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 bool NoInheritance = false; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel, 421 bool NoInheritance = true) 422 : CGF(CGF), NoInheritance(NoInheritance) { 423 // Start emission for the construct. 424 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 425 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 426 if (NoInheritance) { 427 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 428 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 429 CGF.LambdaThisCaptureField = nullptr; 430 BlockInfo = CGF.BlockInfo; 431 CGF.BlockInfo = nullptr; 432 } 433 } 434 435 ~InlinedOpenMPRegionRAII() { 436 // Restore original CapturedStmtInfo only if we're done with code emission. 437 auto *OldCSI = 438 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 439 delete CGF.CapturedStmtInfo; 440 CGF.CapturedStmtInfo = OldCSI; 441 if (NoInheritance) { 442 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 443 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 444 CGF.BlockInfo = BlockInfo; 445 } 446 } 447 }; 448 449 /// Values for bit flags used in the ident_t to describe the fields. 450 /// All enumeric elements are named and described in accordance with the code 451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 452 enum OpenMPLocationFlags : unsigned { 453 /// Use trampoline for internal microtask. 454 OMP_IDENT_IMD = 0x01, 455 /// Use c-style ident structure. 456 OMP_IDENT_KMPC = 0x02, 457 /// Atomic reduction option for kmpc_reduce. 458 OMP_ATOMIC_REDUCE = 0x10, 459 /// Explicit 'barrier' directive. 460 OMP_IDENT_BARRIER_EXPL = 0x20, 461 /// Implicit barrier in code. 462 OMP_IDENT_BARRIER_IMPL = 0x40, 463 /// Implicit barrier in 'for' directive. 464 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 465 /// Implicit barrier in 'sections' directive. 466 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 467 /// Implicit barrier in 'single' directive. 468 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 469 /// Call of __kmp_for_static_init for static loop. 470 OMP_IDENT_WORK_LOOP = 0x200, 471 /// Call of __kmp_for_static_init for sections. 472 OMP_IDENT_WORK_SECTIONS = 0x400, 473 /// Call of __kmp_for_static_init for distribute. 474 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 475 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 476 }; 477 478 namespace { 479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 480 /// Values for bit flags for marking which requires clauses have been used. 481 enum OpenMPOffloadingRequiresDirFlags : int64_t { 482 /// flag undefined. 483 OMP_REQ_UNDEFINED = 0x000, 484 /// no requires clause present. 485 OMP_REQ_NONE = 0x001, 486 /// reverse_offload clause. 487 OMP_REQ_REVERSE_OFFLOAD = 0x002, 488 /// unified_address clause. 489 OMP_REQ_UNIFIED_ADDRESS = 0x004, 490 /// unified_shared_memory clause. 491 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 492 /// dynamic_allocators clause. 493 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 494 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 495 }; 496 497 enum OpenMPOffloadingReservedDeviceIDs { 498 /// Device ID if the device was not defined, runtime should get it 499 /// from environment variables in the spec. 500 OMP_DEVICEID_UNDEF = -1, 501 }; 502 } // anonymous namespace 503 504 /// Describes ident structure that describes a source location. 505 /// All descriptions are taken from 506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 507 /// Original structure: 508 /// typedef struct ident { 509 /// kmp_int32 reserved_1; /**< might be used in Fortran; 510 /// see above */ 511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 512 /// KMP_IDENT_KMPC identifies this union 513 /// member */ 514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 515 /// see above */ 516 ///#if USE_ITT_BUILD 517 /// /* but currently used for storing 518 /// region-specific ITT */ 519 /// /* contextual information. */ 520 ///#endif /* USE_ITT_BUILD */ 521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 522 /// C++ */ 523 /// char const *psource; /**< String describing the source location. 524 /// The string is composed of semi-colon separated 525 // fields which describe the source file, 526 /// the function and a pair of line numbers that 527 /// delimit the construct. 528 /// */ 529 /// } ident_t; 530 enum IdentFieldIndex { 531 /// might be used in Fortran 532 IdentField_Reserved_1, 533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 534 IdentField_Flags, 535 /// Not really used in Fortran any more 536 IdentField_Reserved_2, 537 /// Source[4] in Fortran, do not use for C++ 538 IdentField_Reserved_3, 539 /// String describing the source location. The string is composed of 540 /// semi-colon separated fields which describe the source file, the function 541 /// and a pair of line numbers that delimit the construct. 542 IdentField_PSource 543 }; 544 545 /// Schedule types for 'omp for' loops (these enumerators are taken from 546 /// the enum sched_type in kmp.h). 547 enum OpenMPSchedType { 548 /// Lower bound for default (unordered) versions. 549 OMP_sch_lower = 32, 550 OMP_sch_static_chunked = 33, 551 OMP_sch_static = 34, 552 OMP_sch_dynamic_chunked = 35, 553 OMP_sch_guided_chunked = 36, 554 OMP_sch_runtime = 37, 555 OMP_sch_auto = 38, 556 /// static with chunk adjustment (e.g., simd) 557 OMP_sch_static_balanced_chunked = 45, 558 /// Lower bound for 'ordered' versions. 559 OMP_ord_lower = 64, 560 OMP_ord_static_chunked = 65, 561 OMP_ord_static = 66, 562 OMP_ord_dynamic_chunked = 67, 563 OMP_ord_guided_chunked = 68, 564 OMP_ord_runtime = 69, 565 OMP_ord_auto = 70, 566 OMP_sch_default = OMP_sch_static, 567 /// dist_schedule types 568 OMP_dist_sch_static_chunked = 91, 569 OMP_dist_sch_static = 92, 570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 571 /// Set if the monotonic schedule modifier was present. 572 OMP_sch_modifier_monotonic = (1 << 29), 573 /// Set if the nonmonotonic schedule modifier was present. 574 OMP_sch_modifier_nonmonotonic = (1 << 30), 575 }; 576 577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 578 /// region. 579 class CleanupTy final : public EHScopeStack::Cleanup { 580 PrePostActionTy *Action; 581 582 public: 583 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 584 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 585 if (!CGF.HaveInsertPoint()) 586 return; 587 Action->Exit(CGF); 588 } 589 }; 590 591 } // anonymous namespace 592 593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 594 CodeGenFunction::RunCleanupsScope Scope(CGF); 595 if (PrePostAction) { 596 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 597 Callback(CodeGen, CGF, *PrePostAction); 598 } else { 599 PrePostActionTy Action; 600 Callback(CodeGen, CGF, Action); 601 } 602 } 603 604 /// Check if the combiner is a call to UDR combiner and if it is so return the 605 /// UDR decl used for reduction. 606 static const OMPDeclareReductionDecl * 607 getReductionInit(const Expr *ReductionOp) { 608 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 609 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 610 if (const auto *DRE = 611 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 612 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 613 return DRD; 614 return nullptr; 615 } 616 617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 618 const OMPDeclareReductionDecl *DRD, 619 const Expr *InitOp, 620 Address Private, Address Original, 621 QualType Ty) { 622 if (DRD->getInitializer()) { 623 std::pair<llvm::Function *, llvm::Function *> Reduction = 624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 625 const auto *CE = cast<CallExpr>(InitOp); 626 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 627 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 628 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 629 const auto *LHSDRE = 630 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 631 const auto *RHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 633 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 634 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 635 [=]() { return Private; }); 636 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 637 [=]() { return Original; }); 638 (void)PrivateScope.Privatize(); 639 RValue Func = RValue::get(Reduction.second); 640 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 641 CGF.EmitIgnoredExpr(InitOp); 642 } else { 643 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 644 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 645 auto *GV = new llvm::GlobalVariable( 646 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 647 llvm::GlobalValue::PrivateLinkage, Init, Name); 648 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 649 RValue InitRVal; 650 switch (CGF.getEvaluationKind(Ty)) { 651 case TEK_Scalar: 652 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 653 break; 654 case TEK_Complex: 655 InitRVal = 656 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 657 break; 658 case TEK_Aggregate: { 659 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 660 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 661 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 662 /*IsInitializer=*/false); 663 return; 664 } 665 } 666 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 667 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 668 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 669 /*IsInitializer=*/false); 670 } 671 } 672 673 /// Emit initialization of arrays of complex types. 674 /// \param DestAddr Address of the array. 675 /// \param Type Type of array. 676 /// \param Init Initial expression of array. 677 /// \param SrcAddr Address of the original array. 678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 679 QualType Type, bool EmitDeclareReductionInit, 680 const Expr *Init, 681 const OMPDeclareReductionDecl *DRD, 682 Address SrcAddr = Address::invalid()) { 683 // Perform element-by-element initialization. 684 QualType ElementTy; 685 686 // Drill down to the base element type on both arrays. 687 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 688 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 689 DestAddr = 690 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 701 // The basic structure here is a while-do loop. 702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 704 llvm::Value *IsEmpty = 705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 707 708 // Enter the loop body, making that address the current address. 709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 710 CGF.EmitBlock(BodyBB); 711 712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 713 714 llvm::PHINode *SrcElementPHI = nullptr; 715 Address SrcElementCurrent = Address::invalid(); 716 if (DRD) { 717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 718 "omp.arraycpy.srcElementPast"); 719 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 720 SrcElementCurrent = 721 Address(SrcElementPHI, 722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 723 } 724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 726 DestElementPHI->addIncoming(DestBegin, EntryBB); 727 Address DestElementCurrent = 728 Address(DestElementPHI, 729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 730 731 // Emit copy. 732 { 733 CodeGenFunction::RunCleanupsScope InitScope(CGF); 734 if (EmitDeclareReductionInit) { 735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 736 SrcElementCurrent, ElementTy); 737 } else 738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 739 /*IsInitializer=*/false); 740 } 741 742 if (DRD) { 743 // Shift the address forward by one element. 744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 745 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 746 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 747 } 748 749 // Shift the address forward by one element. 750 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 751 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 752 // Check whether we've reached the end. 753 llvm::Value *Done = 754 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 755 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 756 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 757 758 // Done. 759 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 760 } 761 762 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 763 return CGF.EmitOMPSharedLValue(E); 764 } 765 766 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 767 const Expr *E) { 768 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 769 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 770 return LValue(); 771 } 772 773 void ReductionCodeGen::emitAggregateInitialization( 774 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 775 const OMPDeclareReductionDecl *DRD) { 776 // Emit VarDecl with copy init for arrays. 777 // Get the address of the original variable captured in current 778 // captured region. 779 const auto *PrivateVD = 780 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 781 bool EmitDeclareReductionInit = 782 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 783 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 784 EmitDeclareReductionInit, 785 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 786 : PrivateVD->getInit(), 787 DRD, SharedLVal.getAddress(CGF)); 788 } 789 790 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 791 ArrayRef<const Expr *> Origs, 792 ArrayRef<const Expr *> Privates, 793 ArrayRef<const Expr *> ReductionOps) { 794 ClausesData.reserve(Shareds.size()); 795 SharedAddresses.reserve(Shareds.size()); 796 Sizes.reserve(Shareds.size()); 797 BaseDecls.reserve(Shareds.size()); 798 const auto *IOrig = Origs.begin(); 799 const auto *IPriv = Privates.begin(); 800 const auto *IRed = ReductionOps.begin(); 801 for (const Expr *Ref : Shareds) { 802 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 803 std::advance(IOrig, 1); 804 std::advance(IPriv, 1); 805 std::advance(IRed, 1); 806 } 807 } 808 809 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 810 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 811 "Number of generated lvalues must be exactly N."); 812 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 813 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 814 SharedAddresses.emplace_back(First, Second); 815 if (ClausesData[N].Shared == ClausesData[N].Ref) { 816 OrigAddresses.emplace_back(First, Second); 817 } else { 818 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 819 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 820 OrigAddresses.emplace_back(First, Second); 821 } 822 } 823 824 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 825 const auto *PrivateVD = 826 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 827 QualType PrivateType = PrivateVD->getType(); 828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 829 if (!PrivateType->isVariablyModifiedType()) { 830 Sizes.emplace_back( 831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 832 nullptr); 833 return; 834 } 835 llvm::Value *Size; 836 llvm::Value *SizeInChars; 837 auto *ElemType = 838 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 839 ->getElementType(); 840 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 841 if (AsArraySection) { 842 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 843 OrigAddresses[N].first.getPointer(CGF)); 844 Size = CGF.Builder.CreateNUWAdd( 845 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 846 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 847 } else { 848 SizeInChars = 849 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 850 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 851 } 852 Sizes.emplace_back(SizeInChars, Size); 853 CodeGenFunction::OpaqueValueMapping OpaqueMap( 854 CGF, 855 cast<OpaqueValueExpr>( 856 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 857 RValue::get(Size)); 858 CGF.EmitVariablyModifiedType(PrivateType); 859 } 860 861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 862 llvm::Value *Size) { 863 const auto *PrivateVD = 864 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 865 QualType PrivateType = PrivateVD->getType(); 866 if (!PrivateType->isVariablyModifiedType()) { 867 assert(!Size && !Sizes[N].second && 868 "Size should be nullptr for non-variably modified reduction " 869 "items."); 870 return; 871 } 872 CodeGenFunction::OpaqueValueMapping OpaqueMap( 873 CGF, 874 cast<OpaqueValueExpr>( 875 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 876 RValue::get(Size)); 877 CGF.EmitVariablyModifiedType(PrivateType); 878 } 879 880 void ReductionCodeGen::emitInitialization( 881 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 882 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 883 assert(SharedAddresses.size() > N && "No variable was generated"); 884 const auto *PrivateVD = 885 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 886 const OMPDeclareReductionDecl *DRD = 887 getReductionInit(ClausesData[N].ReductionOp); 888 QualType PrivateType = PrivateVD->getType(); 889 PrivateAddr = CGF.Builder.CreateElementBitCast( 890 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 891 QualType SharedType = SharedAddresses[N].first.getType(); 892 SharedLVal = CGF.MakeAddrLValue( 893 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 894 CGF.ConvertTypeForMem(SharedType)), 895 SharedType, SharedAddresses[N].first.getBaseInfo(), 896 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 897 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 898 if (DRD && DRD->getInitializer()) 899 (void)DefaultInit(CGF); 900 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 901 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 902 (void)DefaultInit(CGF); 903 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 904 PrivateAddr, SharedLVal.getAddress(CGF), 905 SharedLVal.getType()); 906 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 907 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 908 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 909 PrivateVD->getType().getQualifiers(), 910 /*IsInitializer=*/false); 911 } 912 } 913 914 bool ReductionCodeGen::needCleanups(unsigned N) { 915 const auto *PrivateVD = 916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 917 QualType PrivateType = PrivateVD->getType(); 918 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 919 return DTorKind != QualType::DK_none; 920 } 921 922 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 923 Address PrivateAddr) { 924 const auto *PrivateVD = 925 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 926 QualType PrivateType = PrivateVD->getType(); 927 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 928 if (needCleanups(N)) { 929 PrivateAddr = CGF.Builder.CreateElementBitCast( 930 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 931 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 932 } 933 } 934 935 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 936 LValue BaseLV) { 937 BaseTy = BaseTy.getNonReferenceType(); 938 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 939 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 940 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 941 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 942 } else { 943 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 944 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 945 } 946 BaseTy = BaseTy->getPointeeType(); 947 } 948 return CGF.MakeAddrLValue( 949 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 950 CGF.ConvertTypeForMem(ElTy)), 951 BaseLV.getType(), BaseLV.getBaseInfo(), 952 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 953 } 954 955 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 956 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 957 llvm::Value *Addr) { 958 Address Tmp = Address::invalid(); 959 Address TopTmp = Address::invalid(); 960 Address MostTopTmp = Address::invalid(); 961 BaseTy = BaseTy.getNonReferenceType(); 962 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 963 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 964 Tmp = CGF.CreateMemTemp(BaseTy); 965 if (TopTmp.isValid()) 966 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 967 else 968 MostTopTmp = Tmp; 969 TopTmp = Tmp; 970 BaseTy = BaseTy->getPointeeType(); 971 } 972 llvm::Type *Ty = BaseLVType; 973 if (Tmp.isValid()) 974 Ty = Tmp.getElementType(); 975 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 976 if (Tmp.isValid()) { 977 CGF.Builder.CreateStore(Addr, Tmp); 978 return MostTopTmp; 979 } 980 return Address(Addr, BaseLVAlignment); 981 } 982 983 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 984 const VarDecl *OrigVD = nullptr; 985 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 986 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 987 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 988 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 989 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 990 Base = TempASE->getBase()->IgnoreParenImpCasts(); 991 DE = cast<DeclRefExpr>(Base); 992 OrigVD = cast<VarDecl>(DE->getDecl()); 993 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 994 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 995 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 996 Base = TempASE->getBase()->IgnoreParenImpCasts(); 997 DE = cast<DeclRefExpr>(Base); 998 OrigVD = cast<VarDecl>(DE->getDecl()); 999 } 1000 return OrigVD; 1001 } 1002 1003 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1004 Address PrivateAddr) { 1005 const DeclRefExpr *DE; 1006 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1007 BaseDecls.emplace_back(OrigVD); 1008 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1009 LValue BaseLValue = 1010 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue); 1012 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1013 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1014 llvm::Value *PrivatePointer = 1015 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1016 PrivateAddr.getPointer(), 1017 SharedAddresses[N].first.getAddress(CGF).getType()); 1018 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1019 return castToBase(CGF, OrigVD->getType(), 1020 SharedAddresses[N].first.getType(), 1021 OriginalBaseLValue.getAddress(CGF).getType(), 1022 OriginalBaseLValue.getAlignment(), Ptr); 1023 } 1024 BaseDecls.emplace_back( 1025 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1026 return PrivateAddr; 1027 } 1028 1029 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1030 const OMPDeclareReductionDecl *DRD = 1031 getReductionInit(ClausesData[N].ReductionOp); 1032 return DRD && DRD->getInitializer(); 1033 } 1034 1035 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1036 return CGF.EmitLoadOfPointerLValue( 1037 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1038 getThreadIDVariable()->getType()->castAs<PointerType>()); 1039 } 1040 1041 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1042 if (!CGF.HaveInsertPoint()) 1043 return; 1044 // 1.2.2 OpenMP Language Terminology 1045 // Structured block - An executable statement with a single entry at the 1046 // top and a single exit at the bottom. 1047 // The point of exit cannot be a branch out of the structured block. 1048 // longjmp() and throw() must not violate the entry/exit criteria. 1049 CGF.EHStack.pushTerminate(); 1050 if (S) 1051 CGF.incrementProfileCounter(S); 1052 CodeGen(CGF); 1053 CGF.EHStack.popTerminate(); 1054 } 1055 1056 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1057 CodeGenFunction &CGF) { 1058 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1059 getThreadIDVariable()->getType(), 1060 AlignmentSource::Decl); 1061 } 1062 1063 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1064 QualType FieldTy) { 1065 auto *Field = FieldDecl::Create( 1066 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1067 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1068 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1069 Field->setAccess(AS_public); 1070 DC->addDecl(Field); 1071 return Field; 1072 } 1073 1074 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1075 StringRef Separator) 1076 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1077 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1078 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1079 1080 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1081 OMPBuilder.initialize(); 1082 loadOffloadInfoMetadata(); 1083 } 1084 1085 void CGOpenMPRuntime::clear() { 1086 InternalVars.clear(); 1087 // Clean non-target variable declarations possibly used only in debug info. 1088 for (const auto &Data : EmittedNonTargetVariables) { 1089 if (!Data.getValue().pointsToAliveValue()) 1090 continue; 1091 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1092 if (!GV) 1093 continue; 1094 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1095 continue; 1096 GV->eraseFromParent(); 1097 } 1098 } 1099 1100 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1101 SmallString<128> Buffer; 1102 llvm::raw_svector_ostream OS(Buffer); 1103 StringRef Sep = FirstSeparator; 1104 for (StringRef Part : Parts) { 1105 OS << Sep << Part; 1106 Sep = Separator; 1107 } 1108 return std::string(OS.str()); 1109 } 1110 1111 static llvm::Function * 1112 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1113 const Expr *CombinerInitializer, const VarDecl *In, 1114 const VarDecl *Out, bool IsCombiner) { 1115 // void .omp_combiner.(Ty *in, Ty *out); 1116 ASTContext &C = CGM.getContext(); 1117 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1118 FunctionArgList Args; 1119 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1122 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1123 Args.push_back(&OmpOutParm); 1124 Args.push_back(&OmpInParm); 1125 const CGFunctionInfo &FnInfo = 1126 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1127 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1128 std::string Name = CGM.getOpenMPRuntime().getName( 1129 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1130 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1131 Name, &CGM.getModule()); 1132 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1133 if (CGM.getLangOpts().Optimize) { 1134 Fn->removeFnAttr(llvm::Attribute::NoInline); 1135 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1136 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1137 } 1138 CodeGenFunction CGF(CGM); 1139 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1140 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1141 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1142 Out->getLocation()); 1143 CodeGenFunction::OMPPrivateScope Scope(CGF); 1144 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1145 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1146 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1147 .getAddress(CGF); 1148 }); 1149 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1150 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1151 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1152 .getAddress(CGF); 1153 }); 1154 (void)Scope.Privatize(); 1155 if (!IsCombiner && Out->hasInit() && 1156 !CGF.isTrivialInitializer(Out->getInit())) { 1157 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1158 Out->getType().getQualifiers(), 1159 /*IsInitializer=*/true); 1160 } 1161 if (CombinerInitializer) 1162 CGF.EmitIgnoredExpr(CombinerInitializer); 1163 Scope.ForceCleanup(); 1164 CGF.FinishFunction(); 1165 return Fn; 1166 } 1167 1168 void CGOpenMPRuntime::emitUserDefinedReduction( 1169 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1170 if (UDRMap.count(D) > 0) 1171 return; 1172 llvm::Function *Combiner = emitCombinerOrInitializer( 1173 CGM, D->getType(), D->getCombiner(), 1174 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1176 /*IsCombiner=*/true); 1177 llvm::Function *Initializer = nullptr; 1178 if (const Expr *Init = D->getInitializer()) { 1179 Initializer = emitCombinerOrInitializer( 1180 CGM, D->getType(), 1181 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1182 : nullptr, 1183 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1185 /*IsCombiner=*/false); 1186 } 1187 UDRMap.try_emplace(D, Combiner, Initializer); 1188 if (CGF) { 1189 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1190 Decls.second.push_back(D); 1191 } 1192 } 1193 1194 std::pair<llvm::Function *, llvm::Function *> 1195 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1196 auto I = UDRMap.find(D); 1197 if (I != UDRMap.end()) 1198 return I->second; 1199 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1200 return UDRMap.lookup(D); 1201 } 1202 1203 namespace { 1204 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1205 // Builder if one is present. 1206 struct PushAndPopStackRAII { 1207 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1208 bool HasCancel) 1209 : OMPBuilder(OMPBuilder) { 1210 if (!OMPBuilder) 1211 return; 1212 1213 // The following callback is the crucial part of clangs cleanup process. 1214 // 1215 // NOTE: 1216 // Once the OpenMPIRBuilder is used to create parallel regions (and 1217 // similar), the cancellation destination (Dest below) is determined via 1218 // IP. That means if we have variables to finalize we split the block at IP, 1219 // use the new block (=BB) as destination to build a JumpDest (via 1220 // getJumpDestInCurrentScope(BB)) which then is fed to 1221 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1222 // to push & pop an FinalizationInfo object. 1223 // The FiniCB will still be needed but at the point where the 1224 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1225 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1226 assert(IP.getBlock()->end() == IP.getPoint() && 1227 "Clang CG should cause non-terminated block!"); 1228 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1229 CGF.Builder.restoreIP(IP); 1230 CodeGenFunction::JumpDest Dest = 1231 CGF.getOMPCancelDestination(OMPD_parallel); 1232 CGF.EmitBranchThroughCleanup(Dest); 1233 }; 1234 1235 // TODO: Remove this once we emit parallel regions through the 1236 // OpenMPIRBuilder as it can do this setup internally. 1237 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1238 {FiniCB, OMPD_parallel, HasCancel}); 1239 OMPBuilder->pushFinalizationCB(std::move(FI)); 1240 } 1241 ~PushAndPopStackRAII() { 1242 if (OMPBuilder) 1243 OMPBuilder->popFinalizationCB(); 1244 } 1245 llvm::OpenMPIRBuilder *OMPBuilder; 1246 }; 1247 } // namespace 1248 1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1250 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1252 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1253 assert(ThreadIDVar->getType()->isPointerType() && 1254 "thread id variable must be of type kmp_int32 *"); 1255 CodeGenFunction CGF(CGM, true); 1256 bool HasCancel = false; 1257 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1260 HasCancel = OPD->hasCancel(); 1261 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1262 HasCancel = OPSD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 1276 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1277 // parallel region to make cancellation barriers work properly. 1278 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1279 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1280 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1281 HasCancel, OutlinedHelperName); 1282 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1283 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1284 } 1285 1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1287 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1289 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1290 return emitParallelOrTeamsOutlinedFunction( 1291 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1292 } 1293 1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1297 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1298 return emitParallelOrTeamsOutlinedFunction( 1299 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1300 } 1301 1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1303 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1304 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1305 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1306 bool Tied, unsigned &NumberOfParts) { 1307 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1308 PrePostActionTy &) { 1309 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1310 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1311 llvm::Value *TaskArgs[] = { 1312 UpLoc, ThreadID, 1313 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1314 TaskTVar->getType()->castAs<PointerType>()) 1315 .getPointer(CGF)}; 1316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1317 CGM.getModule(), OMPRTL___kmpc_omp_task), 1318 TaskArgs); 1319 }; 1320 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1321 UntiedCodeGen); 1322 CodeGen.setAction(Action); 1323 assert(!ThreadIDVar->getType()->isPointerType() && 1324 "thread id variable must be of type kmp_int32 for tasks"); 1325 const OpenMPDirectiveKind Region = 1326 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1327 : OMPD_task; 1328 const CapturedStmt *CS = D.getCapturedStmt(Region); 1329 bool HasCancel = false; 1330 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 1339 CodeGenFunction CGF(CGM, true); 1340 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1341 InnermostKind, HasCancel, Action); 1342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1343 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1344 if (!Tied) 1345 NumberOfParts = Action.getNumberOfParts(); 1346 return Res; 1347 } 1348 1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1350 const RecordDecl *RD, const CGRecordLayout &RL, 1351 ArrayRef<llvm::Constant *> Data) { 1352 llvm::StructType *StructTy = RL.getLLVMType(); 1353 unsigned PrevIdx = 0; 1354 ConstantInitBuilder CIBuilder(CGM); 1355 auto DI = Data.begin(); 1356 for (const FieldDecl *FD : RD->fields()) { 1357 unsigned Idx = RL.getLLVMFieldNo(FD); 1358 // Fill the alignment. 1359 for (unsigned I = PrevIdx; I < Idx; ++I) 1360 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1361 PrevIdx = Idx + 1; 1362 Fields.add(*DI); 1363 ++DI; 1364 } 1365 } 1366 1367 template <class... As> 1368 static llvm::GlobalVariable * 1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1370 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1371 As &&... Args) { 1372 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1373 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1374 ConstantInitBuilder CIBuilder(CGM); 1375 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1376 buildStructValue(Fields, CGM, RD, RL, Data); 1377 return Fields.finishAndCreateGlobal( 1378 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1379 std::forward<As>(Args)...); 1380 } 1381 1382 template <typename T> 1383 static void 1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1385 ArrayRef<llvm::Constant *> Data, 1386 T &Parent) { 1387 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1388 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1389 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1390 buildStructValue(Fields, CGM, RD, RL, Data); 1391 Fields.finishAndAddTo(Parent); 1392 } 1393 1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1395 bool AtCurrentPoint) { 1396 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1397 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1398 1399 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1400 if (AtCurrentPoint) { 1401 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1402 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1403 } else { 1404 Elem.second.ServiceInsertPt = 1405 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1406 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1407 } 1408 } 1409 1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1412 if (Elem.second.ServiceInsertPt) { 1413 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1414 Elem.second.ServiceInsertPt = nullptr; 1415 Ptr->eraseFromParent(); 1416 } 1417 } 1418 1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1420 SourceLocation Loc, 1421 SmallString<128> &Buffer) { 1422 llvm::raw_svector_ostream OS(Buffer); 1423 // Build debug location 1424 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1425 OS << ";" << PLoc.getFilename() << ";"; 1426 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1427 OS << FD->getQualifiedNameAsString(); 1428 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1429 return OS.str(); 1430 } 1431 1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1433 SourceLocation Loc, 1434 unsigned Flags) { 1435 llvm::Constant *SrcLocStr; 1436 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1437 Loc.isInvalid()) { 1438 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1439 } else { 1440 std::string FunctionName = ""; 1441 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1442 FunctionName = FD->getQualifiedNameAsString(); 1443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1444 const char *FileName = PLoc.getFilename(); 1445 unsigned Line = PLoc.getLine(); 1446 unsigned Column = PLoc.getColumn(); 1447 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1448 Line, Column); 1449 } 1450 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1451 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1452 Reserved2Flags); 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1456 SourceLocation Loc) { 1457 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1458 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1459 // the clang invariants used below might be broken. 1460 if (CGM.getLangOpts().OpenMPIRBuilder) { 1461 SmallString<128> Buffer; 1462 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521 } 1522 1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542 } 1543 1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546 } 1547 1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556 } 1557 1558 llvm::FunctionCallee 1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1563 : "__kmpc_for_static_init_4u") 1564 : (IVSigned ? "__kmpc_for_static_init_8" 1565 : "__kmpc_for_static_init_8u"); 1566 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1567 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1568 llvm::Type *TypeParams[] = { 1569 getIdentTyPointerTy(), // loc 1570 CGM.Int32Ty, // tid 1571 CGM.Int32Ty, // schedtype 1572 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1573 PtrTy, // p_lower 1574 PtrTy, // p_upper 1575 PtrTy, // p_stride 1576 ITy, // incr 1577 ITy // chunk 1578 }; 1579 auto *FnTy = 1580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1581 return CGM.CreateRuntimeFunction(FnTy, Name); 1582 } 1583 1584 llvm::FunctionCallee 1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1586 assert((IVSize == 32 || IVSize == 64) && 1587 "IV size is not compatible with the omp runtime"); 1588 StringRef Name = 1589 IVSize == 32 1590 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1591 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1592 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1593 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1594 CGM.Int32Ty, // tid 1595 CGM.Int32Ty, // schedtype 1596 ITy, // lower 1597 ITy, // upper 1598 ITy, // stride 1599 ITy // chunk 1600 }; 1601 auto *FnTy = 1602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1603 return CGM.CreateRuntimeFunction(FnTy, Name); 1604 } 1605 1606 llvm::FunctionCallee 1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1608 assert((IVSize == 32 || IVSize == 64) && 1609 "IV size is not compatible with the omp runtime"); 1610 StringRef Name = 1611 IVSize == 32 1612 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1613 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1614 llvm::Type *TypeParams[] = { 1615 getIdentTyPointerTy(), // loc 1616 CGM.Int32Ty, // tid 1617 }; 1618 auto *FnTy = 1619 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1620 return CGM.CreateRuntimeFunction(FnTy, Name); 1621 } 1622 1623 llvm::FunctionCallee 1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1625 assert((IVSize == 32 || IVSize == 64) && 1626 "IV size is not compatible with the omp runtime"); 1627 StringRef Name = 1628 IVSize == 32 1629 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1630 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1631 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1632 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1633 llvm::Type *TypeParams[] = { 1634 getIdentTyPointerTy(), // loc 1635 CGM.Int32Ty, // tid 1636 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1637 PtrTy, // p_lower 1638 PtrTy, // p_upper 1639 PtrTy // p_stride 1640 }; 1641 auto *FnTy = 1642 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1643 return CGM.CreateRuntimeFunction(FnTy, Name); 1644 } 1645 1646 /// Obtain information that uniquely identifies a target entry. This 1647 /// consists of the file and device IDs as well as line number associated with 1648 /// the relevant entry source location. 1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1650 unsigned &DeviceID, unsigned &FileID, 1651 unsigned &LineNum) { 1652 SourceManager &SM = C.getSourceManager(); 1653 1654 // The loc should be always valid and have a file ID (the user cannot use 1655 // #pragma directives in macros) 1656 1657 assert(Loc.isValid() && "Source location is expected to be always valid."); 1658 1659 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1660 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1661 1662 llvm::sys::fs::UniqueID ID; 1663 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1664 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1666 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1667 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1668 << PLoc.getFilename() << EC.message(); 1669 } 1670 1671 DeviceID = ID.getDevice(); 1672 FileID = ID.getFile(); 1673 LineNum = PLoc.getLine(); 1674 } 1675 1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1677 if (CGM.getLangOpts().OpenMPSimd) 1678 return Address::invalid(); 1679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1681 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1682 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1683 HasRequiresUnifiedSharedMemory))) { 1684 SmallString<64> PtrName; 1685 { 1686 llvm::raw_svector_ostream OS(PtrName); 1687 OS << CGM.getMangledName(GlobalDecl(VD)); 1688 if (!VD->isExternallyVisible()) { 1689 unsigned DeviceID, FileID, Line; 1690 getTargetEntryUniqueInfo(CGM.getContext(), 1691 VD->getCanonicalDecl()->getBeginLoc(), 1692 DeviceID, FileID, Line); 1693 OS << llvm::format("_%x", FileID); 1694 } 1695 OS << "_decl_tgt_ref_ptr"; 1696 } 1697 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1698 if (!Ptr) { 1699 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1700 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1701 PtrName); 1702 1703 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1704 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1705 1706 if (!CGM.getLangOpts().OpenMPIsDevice) 1707 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1708 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1709 } 1710 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1711 } 1712 return Address::invalid(); 1713 } 1714 1715 llvm::Constant * 1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1717 assert(!CGM.getLangOpts().OpenMPUseTLS || 1718 !CGM.getContext().getTargetInfo().isTLSSupported()); 1719 // Lookup the entry, lazily creating it if necessary. 1720 std::string Suffix = getName({"cache", ""}); 1721 return getOrCreateInternalVariable( 1722 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1723 } 1724 1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1726 const VarDecl *VD, 1727 Address VDAddr, 1728 SourceLocation Loc) { 1729 if (CGM.getLangOpts().OpenMPUseTLS && 1730 CGM.getContext().getTargetInfo().isTLSSupported()) 1731 return VDAddr; 1732 1733 llvm::Type *VarTy = VDAddr.getElementType(); 1734 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1735 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1736 CGM.Int8PtrTy), 1737 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1738 getOrCreateThreadPrivateCache(VD)}; 1739 return Address(CGF.EmitRuntimeCall( 1740 OMPBuilder.getOrCreateRuntimeFunction( 1741 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1742 Args), 1743 VDAddr.getAlignment()); 1744 } 1745 1746 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1750 // library. 1751 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1753 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1754 OMPLoc); 1755 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1756 // to register constructor/destructor for variable. 1757 llvm::Value *Args[] = { 1758 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1759 Ctor, CopyCtor, Dtor}; 1760 CGF.EmitRuntimeCall( 1761 OMPBuilder.getOrCreateRuntimeFunction( 1762 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1763 Args); 1764 } 1765 1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1767 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1768 bool PerformInit, CodeGenFunction *CGF) { 1769 if (CGM.getLangOpts().OpenMPUseTLS && 1770 CGM.getContext().getTargetInfo().isTLSSupported()) 1771 return nullptr; 1772 1773 VD = VD->getDefinition(CGM.getContext()); 1774 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1775 QualType ASTTy = VD->getType(); 1776 1777 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1778 const Expr *Init = VD->getAnyInitializer(); 1779 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1780 // Generate function that re-emits the declaration's initializer into the 1781 // threadprivate copy of the variable VD 1782 CodeGenFunction CtorCGF(CGM); 1783 FunctionArgList Args; 1784 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1785 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1786 ImplicitParamDecl::Other); 1787 Args.push_back(&Dst); 1788 1789 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1790 CGM.getContext().VoidPtrTy, Args); 1791 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1792 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1793 llvm::Function *Fn = 1794 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1795 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1796 Args, Loc, Loc); 1797 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1798 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1799 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1800 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1801 Arg = CtorCGF.Builder.CreateElementBitCast( 1802 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1803 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1804 /*IsInitializer=*/true); 1805 ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1809 CtorCGF.FinishFunction(); 1810 Ctor = Fn; 1811 } 1812 if (VD->getType().isDestructedType() != QualType::DK_none) { 1813 // Generate function that emits destructor call for the threadprivate copy 1814 // of the variable VD 1815 CodeGenFunction DtorCGF(CGM); 1816 FunctionArgList Args; 1817 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1818 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1819 ImplicitParamDecl::Other); 1820 Args.push_back(&Dst); 1821 1822 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1823 CGM.getContext().VoidTy, Args); 1824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1825 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1826 llvm::Function *Fn = 1827 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1828 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1829 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1830 Loc, Loc); 1831 // Create a scope with an artificial location for the body of this function. 1832 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1833 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1834 DtorCGF.GetAddrOfLocalVar(&Dst), 1835 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1836 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1837 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1838 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1839 DtorCGF.FinishFunction(); 1840 Dtor = Fn; 1841 } 1842 // Do not emit init function if it is not required. 1843 if (!Ctor && !Dtor) 1844 return nullptr; 1845 1846 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1847 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1848 /*isVarArg=*/false) 1849 ->getPointerTo(); 1850 // Copying constructor for the threadprivate variable. 1851 // Must be NULL - reserved by runtime, but currently it requires that this 1852 // parameter is always NULL. Otherwise it fires assertion. 1853 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1854 if (Ctor == nullptr) { 1855 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 Ctor = llvm::Constant::getNullValue(CtorTy); 1859 } 1860 if (Dtor == nullptr) { 1861 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Dtor = llvm::Constant::getNullValue(DtorTy); 1865 } 1866 if (!CGF) { 1867 auto *InitFunctionTy = 1868 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1869 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1870 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1871 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1872 CodeGenFunction InitCGF(CGM); 1873 FunctionArgList ArgList; 1874 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1875 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1876 Loc, Loc); 1877 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 InitCGF.FinishFunction(); 1879 return InitFunction; 1880 } 1881 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1882 } 1883 return nullptr; 1884 } 1885 1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1887 llvm::GlobalVariable *Addr, 1888 bool PerformInit) { 1889 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1890 !CGM.getLangOpts().OpenMPIsDevice) 1891 return false; 1892 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1893 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1894 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1895 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1896 HasRequiresUnifiedSharedMemory)) 1897 return CGM.getLangOpts().OpenMPIsDevice; 1898 VD = VD->getDefinition(CGM.getContext()); 1899 assert(VD && "Unknown VarDecl"); 1900 1901 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1902 return CGM.getLangOpts().OpenMPIsDevice; 1903 1904 QualType ASTTy = VD->getType(); 1905 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1906 1907 // Produce the unique prefix to identify the new target regions. We use 1908 // the source location of the variable declaration which we know to not 1909 // conflict with any target region. 1910 unsigned DeviceID; 1911 unsigned FileID; 1912 unsigned Line; 1913 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1914 SmallString<128> Buffer, Out; 1915 { 1916 llvm::raw_svector_ostream OS(Buffer); 1917 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1918 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1919 } 1920 1921 const Expr *Init = VD->getAnyInitializer(); 1922 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1923 llvm::Constant *Ctor; 1924 llvm::Constant *ID; 1925 if (CGM.getLangOpts().OpenMPIsDevice) { 1926 // Generate function that re-emits the declaration's initializer into 1927 // the threadprivate copy of the variable VD 1928 CodeGenFunction CtorCGF(CGM); 1929 1930 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1931 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1932 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1933 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1934 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1935 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1938 CtorCGF.EmitAnyExprToMem(Init, 1939 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1940 Init->getType().getQualifiers(), 1941 /*IsInitializer=*/true); 1942 CtorCGF.FinishFunction(); 1943 Ctor = Fn; 1944 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1945 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1946 } else { 1947 Ctor = new llvm::GlobalVariable( 1948 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1949 llvm::GlobalValue::PrivateLinkage, 1950 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1951 ID = Ctor; 1952 } 1953 1954 // Register the information for the entry associated with the constructor. 1955 Out.clear(); 1956 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1957 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1958 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1959 } 1960 if (VD->getType().isDestructedType() != QualType::DK_none) { 1961 llvm::Constant *Dtor; 1962 llvm::Constant *ID; 1963 if (CGM.getLangOpts().OpenMPIsDevice) { 1964 // Generate function that emits destructor call for the threadprivate 1965 // copy of the variable VD 1966 CodeGenFunction DtorCGF(CGM); 1967 1968 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1969 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1970 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1971 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1972 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1973 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1974 FunctionArgList(), Loc, Loc); 1975 // Create a scope with an artificial location for the body of this 1976 // function. 1977 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1978 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1979 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1980 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1981 DtorCGF.FinishFunction(); 1982 Dtor = Fn; 1983 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1984 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1985 } else { 1986 Dtor = new llvm::GlobalVariable( 1987 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1988 llvm::GlobalValue::PrivateLinkage, 1989 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1990 ID = Dtor; 1991 } 1992 // Register the information for the entry associated with the destructor. 1993 Out.clear(); 1994 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1995 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1996 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1997 } 1998 return CGM.getLangOpts().OpenMPIsDevice; 1999 } 2000 2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2002 QualType VarType, 2003 StringRef Name) { 2004 std::string Suffix = getName({"artificial", ""}); 2005 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2006 llvm::Value *GAddr = 2007 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2008 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2009 CGM.getTarget().isTLSSupported()) { 2010 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2011 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2012 } 2013 std::string CacheSuffix = getName({"cache", ""}); 2014 llvm::Value *Args[] = { 2015 emitUpdateLocation(CGF, SourceLocation()), 2016 getThreadID(CGF, SourceLocation()), 2017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2018 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2019 /*isSigned=*/false), 2020 getOrCreateInternalVariable( 2021 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2022 return Address( 2023 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2024 CGF.EmitRuntimeCall( 2025 OMPBuilder.getOrCreateRuntimeFunction( 2026 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2027 Args), 2028 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2029 CGM.getContext().getTypeAlignInChars(VarType)); 2030 } 2031 2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2033 const RegionCodeGenTy &ThenGen, 2034 const RegionCodeGenTy &ElseGen) { 2035 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2036 2037 // If the condition constant folds and can be elided, try to avoid emitting 2038 // the condition and the dead arm of the if/else. 2039 bool CondConstant; 2040 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2041 if (CondConstant) 2042 ThenGen(CGF); 2043 else 2044 ElseGen(CGF); 2045 return; 2046 } 2047 2048 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2049 // emit the conditional branch. 2050 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2051 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2052 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2053 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2054 2055 // Emit the 'then' code. 2056 CGF.EmitBlock(ThenBlock); 2057 ThenGen(CGF); 2058 CGF.EmitBranch(ContBlock); 2059 // Emit the 'else' code if present. 2060 // There is no need to emit line number for unconditional branch. 2061 (void)ApplyDebugLocation::CreateEmpty(CGF); 2062 CGF.EmitBlock(ElseBlock); 2063 ElseGen(CGF); 2064 // There is no need to emit line number for unconditional branch. 2065 (void)ApplyDebugLocation::CreateEmpty(CGF); 2066 CGF.EmitBranch(ContBlock); 2067 // Emit the continuation block for code after the if. 2068 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2069 } 2070 2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2072 llvm::Function *OutlinedFn, 2073 ArrayRef<llvm::Value *> CapturedVars, 2074 const Expr *IfCond) { 2075 if (!CGF.HaveInsertPoint()) 2076 return; 2077 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2078 auto &M = CGM.getModule(); 2079 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2082 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2083 llvm::Value *Args[] = { 2084 RTLoc, 2085 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2086 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2087 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2088 RealArgs.append(std::begin(Args), std::end(Args)); 2089 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2090 2091 llvm::FunctionCallee RTLFn = 2092 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2093 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2094 }; 2095 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2096 this](CodeGenFunction &CGF, PrePostActionTy &) { 2097 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2098 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2099 // Build calls: 2100 // __kmpc_serialized_parallel(&Loc, GTid); 2101 llvm::Value *Args[] = {RTLoc, ThreadID}; 2102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2103 M, OMPRTL___kmpc_serialized_parallel), 2104 Args); 2105 2106 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2107 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2108 Address ZeroAddrBound = 2109 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2110 /*Name=*/".bound.zero.addr"); 2111 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2112 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2113 // ThreadId for serialized parallels is 0. 2114 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2115 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2116 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2117 2118 // Ensure we do not inline the function. This is trivially true for the ones 2119 // passed to __kmpc_fork_call but the ones calles in serialized regions 2120 // could be inlined. This is not a perfect but it is closer to the invariant 2121 // we want, namely, every data environment starts with a new function. 2122 // TODO: We should pass the if condition to the runtime function and do the 2123 // handling there. Much cleaner code. 2124 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2125 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2126 2127 // __kmpc_end_serialized_parallel(&Loc, GTid); 2128 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2130 M, OMPRTL___kmpc_end_serialized_parallel), 2131 EndArgs); 2132 }; 2133 if (IfCond) { 2134 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2135 } else { 2136 RegionCodeGenTy ThenRCG(ThenGen); 2137 ThenRCG(CGF); 2138 } 2139 } 2140 2141 // If we're inside an (outlined) parallel region, use the region info's 2142 // thread-ID variable (it is passed in a first argument of the outlined function 2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2144 // regular serial code region, get thread ID by calling kmp_int32 2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2146 // return the address of that temp. 2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2148 SourceLocation Loc) { 2149 if (auto *OMPRegionInfo = 2150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 if (OMPRegionInfo->getThreadIDVariable()) 2152 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2153 2154 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2155 QualType Int32Ty = 2156 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2157 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2158 CGF.EmitStoreOfScalar(ThreadID, 2159 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2160 2161 return ThreadIDTemp; 2162 } 2163 2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2165 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2166 SmallString<256> Buffer; 2167 llvm::raw_svector_ostream Out(Buffer); 2168 Out << Name; 2169 StringRef RuntimeName = Out.str(); 2170 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2171 if (Elem.second) { 2172 assert(Elem.second->getType()->getPointerElementType() == Ty && 2173 "OMP internal variable has different type than requested"); 2174 return &*Elem.second; 2175 } 2176 2177 return Elem.second = new llvm::GlobalVariable( 2178 CGM.getModule(), Ty, /*IsConstant*/ false, 2179 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2180 Elem.first(), /*InsertBefore=*/nullptr, 2181 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2182 } 2183 2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2185 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2186 std::string Name = getName({Prefix, "var"}); 2187 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2188 } 2189 2190 namespace { 2191 /// Common pre(post)-action for different OpenMP constructs. 2192 class CommonActionTy final : public PrePostActionTy { 2193 llvm::FunctionCallee EnterCallee; 2194 ArrayRef<llvm::Value *> EnterArgs; 2195 llvm::FunctionCallee ExitCallee; 2196 ArrayRef<llvm::Value *> ExitArgs; 2197 bool Conditional; 2198 llvm::BasicBlock *ContBlock = nullptr; 2199 2200 public: 2201 CommonActionTy(llvm::FunctionCallee EnterCallee, 2202 ArrayRef<llvm::Value *> EnterArgs, 2203 llvm::FunctionCallee ExitCallee, 2204 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2205 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2206 ExitArgs(ExitArgs), Conditional(Conditional) {} 2207 void Enter(CodeGenFunction &CGF) override { 2208 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2209 if (Conditional) { 2210 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2211 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2212 ContBlock = CGF.createBasicBlock("omp_if.end"); 2213 // Generate the branch (If-stmt) 2214 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2215 CGF.EmitBlock(ThenBlock); 2216 } 2217 } 2218 void Done(CodeGenFunction &CGF) { 2219 // Emit the rest of blocks/branches 2220 CGF.EmitBranch(ContBlock); 2221 CGF.EmitBlock(ContBlock, true); 2222 } 2223 void Exit(CodeGenFunction &CGF) override { 2224 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2225 } 2226 }; 2227 } // anonymous namespace 2228 2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2230 StringRef CriticalName, 2231 const RegionCodeGenTy &CriticalOpGen, 2232 SourceLocation Loc, const Expr *Hint) { 2233 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2234 // CriticalOpGen(); 2235 // __kmpc_end_critical(ident_t *, gtid, Lock); 2236 // Prepare arguments and build a call to __kmpc_critical 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2240 getCriticalRegionLock(CriticalName)}; 2241 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2242 std::end(Args)); 2243 if (Hint) { 2244 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2245 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2246 } 2247 CommonActionTy Action( 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), 2250 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2251 EnterArgs, 2252 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2253 OMPRTL___kmpc_end_critical), 2254 Args); 2255 CriticalOpGen.setAction(Action); 2256 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2257 } 2258 2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2260 const RegionCodeGenTy &MasterOpGen, 2261 SourceLocation Loc) { 2262 if (!CGF.HaveInsertPoint()) 2263 return; 2264 // if(__kmpc_master(ident_t *, gtid)) { 2265 // MasterOpGen(); 2266 // __kmpc_end_master(ident_t *, gtid); 2267 // } 2268 // Prepare arguments and build a call to __kmpc_master 2269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2270 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2271 CGM.getModule(), OMPRTL___kmpc_master), 2272 Args, 2273 OMPBuilder.getOrCreateRuntimeFunction( 2274 CGM.getModule(), OMPRTL___kmpc_end_master), 2275 Args, 2276 /*Conditional=*/true); 2277 MasterOpGen.setAction(Action); 2278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2279 Action.Done(CGF); 2280 } 2281 2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2283 const RegionCodeGenTy &MaskedOpGen, 2284 SourceLocation Loc, const Expr *Filter) { 2285 if (!CGF.HaveInsertPoint()) 2286 return; 2287 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2288 // MaskedOpGen(); 2289 // __kmpc_end_masked(iden_t *, gtid); 2290 // } 2291 // Prepare arguments and build a call to __kmpc_masked 2292 llvm::Value *FilterVal = Filter 2293 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2294 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2296 FilterVal}; 2297 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2298 getThreadID(CGF, Loc)}; 2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_masked), 2301 Args, 2302 OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_end_masked), 2304 ArgsEnd, 2305 /*Conditional=*/true); 2306 MaskedOpGen.setAction(Action); 2307 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2308 Action.Done(CGF); 2309 } 2310 2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2312 SourceLocation Loc) { 2313 if (!CGF.HaveInsertPoint()) 2314 return; 2315 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2316 OMPBuilder.createTaskyield(CGF.Builder); 2317 } else { 2318 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2319 llvm::Value *Args[] = { 2320 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2321 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2323 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2324 Args); 2325 } 2326 2327 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2328 Region->emitUntiedSwitch(CGF); 2329 } 2330 2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2332 const RegionCodeGenTy &TaskgroupOpGen, 2333 SourceLocation Loc) { 2334 if (!CGF.HaveInsertPoint()) 2335 return; 2336 // __kmpc_taskgroup(ident_t *, gtid); 2337 // TaskgroupOpGen(); 2338 // __kmpc_end_taskgroup(ident_t *, gtid); 2339 // Prepare arguments and build a call to __kmpc_taskgroup 2340 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2341 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2342 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2343 Args, 2344 OMPBuilder.getOrCreateRuntimeFunction( 2345 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2346 Args); 2347 TaskgroupOpGen.setAction(Action); 2348 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2349 } 2350 2351 /// Given an array of pointers to variables, project the address of a 2352 /// given variable. 2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2354 unsigned Index, const VarDecl *Var) { 2355 // Pull out the pointer to the variable. 2356 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2357 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2358 2359 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2360 Addr = CGF.Builder.CreateElementBitCast( 2361 Addr, CGF.ConvertTypeForMem(Var->getType())); 2362 return Addr; 2363 } 2364 2365 static llvm::Value *emitCopyprivateCopyFunction( 2366 CodeGenModule &CGM, llvm::Type *ArgsType, 2367 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2368 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2369 SourceLocation Loc) { 2370 ASTContext &C = CGM.getContext(); 2371 // void copy_func(void *LHSArg, void *RHSArg); 2372 FunctionArgList Args; 2373 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2374 ImplicitParamDecl::Other); 2375 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2376 ImplicitParamDecl::Other); 2377 Args.push_back(&LHSArg); 2378 Args.push_back(&RHSArg); 2379 const auto &CGFI = 2380 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2381 std::string Name = 2382 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2383 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2384 llvm::GlobalValue::InternalLinkage, Name, 2385 &CGM.getModule()); 2386 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2387 Fn->setDoesNotRecurse(); 2388 CodeGenFunction CGF(CGM); 2389 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2390 // Dest = (void*[n])(LHSArg); 2391 // Src = (void*[n])(RHSArg); 2392 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2393 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2394 ArgsType), CGF.getPointerAlign()); 2395 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2397 ArgsType), CGF.getPointerAlign()); 2398 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2399 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2400 // ... 2401 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2402 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2403 const auto *DestVar = 2404 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2405 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2406 2407 const auto *SrcVar = 2408 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2409 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2410 2411 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2412 QualType Type = VD->getType(); 2413 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2414 } 2415 CGF.FinishFunction(); 2416 return Fn; 2417 } 2418 2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2420 const RegionCodeGenTy &SingleOpGen, 2421 SourceLocation Loc, 2422 ArrayRef<const Expr *> CopyprivateVars, 2423 ArrayRef<const Expr *> SrcExprs, 2424 ArrayRef<const Expr *> DstExprs, 2425 ArrayRef<const Expr *> AssignmentOps) { 2426 if (!CGF.HaveInsertPoint()) 2427 return; 2428 assert(CopyprivateVars.size() == SrcExprs.size() && 2429 CopyprivateVars.size() == DstExprs.size() && 2430 CopyprivateVars.size() == AssignmentOps.size()); 2431 ASTContext &C = CGM.getContext(); 2432 // int32 did_it = 0; 2433 // if(__kmpc_single(ident_t *, gtid)) { 2434 // SingleOpGen(); 2435 // __kmpc_end_single(ident_t *, gtid); 2436 // did_it = 1; 2437 // } 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 2441 Address DidIt = Address::invalid(); 2442 if (!CopyprivateVars.empty()) { 2443 // int32 did_it = 0; 2444 QualType KmpInt32Ty = 2445 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2446 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2447 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2448 } 2449 // Prepare arguments and build a call to __kmpc_single 2450 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2451 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2452 CGM.getModule(), OMPRTL___kmpc_single), 2453 Args, 2454 OMPBuilder.getOrCreateRuntimeFunction( 2455 CGM.getModule(), OMPRTL___kmpc_end_single), 2456 Args, 2457 /*Conditional=*/true); 2458 SingleOpGen.setAction(Action); 2459 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2460 if (DidIt.isValid()) { 2461 // did_it = 1; 2462 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2463 } 2464 Action.Done(CGF); 2465 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2466 // <copy_func>, did_it); 2467 if (DidIt.isValid()) { 2468 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2469 QualType CopyprivateArrayTy = C.getConstantArrayType( 2470 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2471 /*IndexTypeQuals=*/0); 2472 // Create a list of all private variables for copyprivate. 2473 Address CopyprivateList = 2474 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2475 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2476 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2477 CGF.Builder.CreateStore( 2478 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2479 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2480 CGF.VoidPtrTy), 2481 Elem); 2482 } 2483 // Build function that copies private values from single region to all other 2484 // threads in the corresponding parallel region. 2485 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2486 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2487 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2488 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2489 Address CL = 2490 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2491 CGF.VoidPtrTy); 2492 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2493 llvm::Value *Args[] = { 2494 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2495 getThreadID(CGF, Loc), // i32 <gtid> 2496 BufSize, // size_t <buf_size> 2497 CL.getPointer(), // void *<copyprivate list> 2498 CpyFn, // void (*) (void *, void *) <copy_func> 2499 DidItVal // i32 did_it 2500 }; 2501 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2502 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2503 Args); 2504 } 2505 } 2506 2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2508 const RegionCodeGenTy &OrderedOpGen, 2509 SourceLocation Loc, bool IsThreads) { 2510 if (!CGF.HaveInsertPoint()) 2511 return; 2512 // __kmpc_ordered(ident_t *, gtid); 2513 // OrderedOpGen(); 2514 // __kmpc_end_ordered(ident_t *, gtid); 2515 // Prepare arguments and build a call to __kmpc_ordered 2516 if (IsThreads) { 2517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2518 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2519 CGM.getModule(), OMPRTL___kmpc_ordered), 2520 Args, 2521 OMPBuilder.getOrCreateRuntimeFunction( 2522 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2523 Args); 2524 OrderedOpGen.setAction(Action); 2525 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2526 return; 2527 } 2528 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2529 } 2530 2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2532 unsigned Flags; 2533 if (Kind == OMPD_for) 2534 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2535 else if (Kind == OMPD_sections) 2536 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2537 else if (Kind == OMPD_single) 2538 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2539 else if (Kind == OMPD_barrier) 2540 Flags = OMP_IDENT_BARRIER_EXPL; 2541 else 2542 Flags = OMP_IDENT_BARRIER_IMPL; 2543 return Flags; 2544 } 2545 2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2547 CodeGenFunction &CGF, const OMPLoopDirective &S, 2548 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2549 // Check if the loop directive is actually a doacross loop directive. In this 2550 // case choose static, 1 schedule. 2551 if (llvm::any_of( 2552 S.getClausesOfKind<OMPOrderedClause>(), 2553 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2554 ScheduleKind = OMPC_SCHEDULE_static; 2555 // Chunk size is 1 in this case. 2556 llvm::APInt ChunkSize(32, 1); 2557 ChunkExpr = IntegerLiteral::Create( 2558 CGF.getContext(), ChunkSize, 2559 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2560 SourceLocation()); 2561 } 2562 } 2563 2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2565 OpenMPDirectiveKind Kind, bool EmitChecks, 2566 bool ForceSimpleCall) { 2567 // Check if we should use the OMPBuilder 2568 auto *OMPRegionInfo = 2569 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2570 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2571 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2572 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2573 return; 2574 } 2575 2576 if (!CGF.HaveInsertPoint()) 2577 return; 2578 // Build call __kmpc_cancel_barrier(loc, thread_id); 2579 // Build call __kmpc_barrier(loc, thread_id); 2580 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2581 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2582 // thread_id); 2583 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2584 getThreadID(CGF, Loc)}; 2585 if (OMPRegionInfo) { 2586 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2587 llvm::Value *Result = CGF.EmitRuntimeCall( 2588 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2589 OMPRTL___kmpc_cancel_barrier), 2590 Args); 2591 if (EmitChecks) { 2592 // if (__kmpc_cancel_barrier()) { 2593 // exit from construct; 2594 // } 2595 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2596 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2597 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2598 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2599 CGF.EmitBlock(ExitBB); 2600 // exit from construct; 2601 CodeGenFunction::JumpDest CancelDestination = 2602 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2603 CGF.EmitBranchThroughCleanup(CancelDestination); 2604 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2605 } 2606 return; 2607 } 2608 } 2609 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2610 CGM.getModule(), OMPRTL___kmpc_barrier), 2611 Args); 2612 } 2613 2614 /// Map the OpenMP loop schedule to the runtime enumeration. 2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked, bool Ordered) { 2617 switch (ScheduleKind) { 2618 case OMPC_SCHEDULE_static: 2619 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2620 : (Ordered ? OMP_ord_static : OMP_sch_static); 2621 case OMPC_SCHEDULE_dynamic: 2622 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2623 case OMPC_SCHEDULE_guided: 2624 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2625 case OMPC_SCHEDULE_runtime: 2626 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2627 case OMPC_SCHEDULE_auto: 2628 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2629 case OMPC_SCHEDULE_unknown: 2630 assert(!Chunked && "chunk was specified but schedule kind not known"); 2631 return Ordered ? OMP_ord_static : OMP_sch_static; 2632 } 2633 llvm_unreachable("Unexpected runtime schedule"); 2634 } 2635 2636 /// Map the OpenMP distribute schedule to the runtime enumeration. 2637 static OpenMPSchedType 2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2639 // only static is allowed for dist_schedule 2640 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2641 } 2642 2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2644 bool Chunked) const { 2645 OpenMPSchedType Schedule = 2646 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2647 return Schedule == OMP_sch_static; 2648 } 2649 2650 bool CGOpenMPRuntime::isStaticNonchunked( 2651 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2652 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2653 return Schedule == OMP_dist_sch_static; 2654 } 2655 2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2657 bool Chunked) const { 2658 OpenMPSchedType Schedule = 2659 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2660 return Schedule == OMP_sch_static_chunked; 2661 } 2662 2663 bool CGOpenMPRuntime::isStaticChunked( 2664 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2665 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2666 return Schedule == OMP_dist_sch_static_chunked; 2667 } 2668 2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2672 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2673 return Schedule != OMP_sch_static; 2674 } 2675 2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2677 OpenMPScheduleClauseModifier M1, 2678 OpenMPScheduleClauseModifier M2) { 2679 int Modifier = 0; 2680 switch (M1) { 2681 case OMPC_SCHEDULE_MODIFIER_monotonic: 2682 Modifier = OMP_sch_modifier_monotonic; 2683 break; 2684 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2685 Modifier = OMP_sch_modifier_nonmonotonic; 2686 break; 2687 case OMPC_SCHEDULE_MODIFIER_simd: 2688 if (Schedule == OMP_sch_static_chunked) 2689 Schedule = OMP_sch_static_balanced_chunked; 2690 break; 2691 case OMPC_SCHEDULE_MODIFIER_last: 2692 case OMPC_SCHEDULE_MODIFIER_unknown: 2693 break; 2694 } 2695 switch (M2) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2711 // If the static schedule kind is specified or if the ordered clause is 2712 // specified, and if the nonmonotonic modifier is not specified, the effect is 2713 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2714 // modifier is specified, the effect is as if the nonmonotonic modifier is 2715 // specified. 2716 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2717 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2718 Schedule == OMP_sch_static_balanced_chunked || 2719 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2720 Schedule == OMP_dist_sch_static_chunked || 2721 Schedule == OMP_dist_sch_static)) 2722 Modifier = OMP_sch_modifier_nonmonotonic; 2723 } 2724 return Schedule | Modifier; 2725 } 2726 2727 void CGOpenMPRuntime::emitForDispatchInit( 2728 CodeGenFunction &CGF, SourceLocation Loc, 2729 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2730 bool Ordered, const DispatchRTInput &DispatchValues) { 2731 if (!CGF.HaveInsertPoint()) 2732 return; 2733 OpenMPSchedType Schedule = getRuntimeSchedule( 2734 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2735 assert(Ordered || 2736 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2737 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2738 Schedule != OMP_sch_static_balanced_chunked)); 2739 // Call __kmpc_dispatch_init( 2740 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2741 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2742 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2743 2744 // If the Chunk was not specified in the clause - use default value 1. 2745 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2746 : CGF.Builder.getIntN(IVSize, 1); 2747 llvm::Value *Args[] = { 2748 emitUpdateLocation(CGF, Loc), 2749 getThreadID(CGF, Loc), 2750 CGF.Builder.getInt32(addMonoNonMonoModifier( 2751 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2752 DispatchValues.LB, // Lower 2753 DispatchValues.UB, // Upper 2754 CGF.Builder.getIntN(IVSize, 1), // Stride 2755 Chunk // Chunk 2756 }; 2757 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2758 } 2759 2760 static void emitForStaticInitCall( 2761 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2762 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2763 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2764 const CGOpenMPRuntime::StaticRTInput &Values) { 2765 if (!CGF.HaveInsertPoint()) 2766 return; 2767 2768 assert(!Values.Ordered); 2769 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2770 Schedule == OMP_sch_static_balanced_chunked || 2771 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2772 Schedule == OMP_dist_sch_static || 2773 Schedule == OMP_dist_sch_static_chunked); 2774 2775 // Call __kmpc_for_static_init( 2776 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2777 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2778 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2779 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2780 llvm::Value *Chunk = Values.Chunk; 2781 if (Chunk == nullptr) { 2782 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2783 Schedule == OMP_dist_sch_static) && 2784 "expected static non-chunked schedule"); 2785 // If the Chunk was not specified in the clause - use default value 1. 2786 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2787 } else { 2788 assert((Schedule == OMP_sch_static_chunked || 2789 Schedule == OMP_sch_static_balanced_chunked || 2790 Schedule == OMP_ord_static_chunked || 2791 Schedule == OMP_dist_sch_static_chunked) && 2792 "expected static chunked schedule"); 2793 } 2794 llvm::Value *Args[] = { 2795 UpdateLocation, 2796 ThreadId, 2797 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2798 M2)), // Schedule type 2799 Values.IL.getPointer(), // &isLastIter 2800 Values.LB.getPointer(), // &LB 2801 Values.UB.getPointer(), // &UB 2802 Values.ST.getPointer(), // &Stride 2803 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2804 Chunk // Chunk 2805 }; 2806 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2807 } 2808 2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2810 SourceLocation Loc, 2811 OpenMPDirectiveKind DKind, 2812 const OpenMPScheduleTy &ScheduleKind, 2813 const StaticRTInput &Values) { 2814 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2815 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2816 assert(isOpenMPWorksharingDirective(DKind) && 2817 "Expected loop-based or sections-based directive."); 2818 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2819 isOpenMPLoopDirective(DKind) 2820 ? OMP_IDENT_WORK_LOOP 2821 : OMP_IDENT_WORK_SECTIONS); 2822 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2823 llvm::FunctionCallee StaticInitFunction = 2824 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2825 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2826 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2827 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2828 } 2829 2830 void CGOpenMPRuntime::emitDistributeStaticInit( 2831 CodeGenFunction &CGF, SourceLocation Loc, 2832 OpenMPDistScheduleClauseKind SchedKind, 2833 const CGOpenMPRuntime::StaticRTInput &Values) { 2834 OpenMPSchedType ScheduleNum = 2835 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2836 llvm::Value *UpdatedLocation = 2837 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2839 llvm::FunctionCallee StaticInitFunction = 2840 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2843 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2844 } 2845 2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2847 SourceLocation Loc, 2848 OpenMPDirectiveKind DKind) { 2849 if (!CGF.HaveInsertPoint()) 2850 return; 2851 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2852 llvm::Value *Args[] = { 2853 emitUpdateLocation(CGF, Loc, 2854 isOpenMPDistributeDirective(DKind) 2855 ? OMP_IDENT_WORK_DISTRIBUTE 2856 : isOpenMPLoopDirective(DKind) 2857 ? OMP_IDENT_WORK_LOOP 2858 : OMP_IDENT_WORK_SECTIONS), 2859 getThreadID(CGF, Loc)}; 2860 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2861 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2862 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2863 Args); 2864 } 2865 2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 unsigned IVSize, 2869 bool IVSigned) { 2870 if (!CGF.HaveInsertPoint()) 2871 return; 2872 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2873 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2874 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2875 } 2876 2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2878 SourceLocation Loc, unsigned IVSize, 2879 bool IVSigned, Address IL, 2880 Address LB, Address UB, 2881 Address ST) { 2882 // Call __kmpc_dispatch_next( 2883 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2884 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2885 // kmp_int[32|64] *p_stride); 2886 llvm::Value *Args[] = { 2887 emitUpdateLocation(CGF, Loc), 2888 getThreadID(CGF, Loc), 2889 IL.getPointer(), // &isLastIter 2890 LB.getPointer(), // &Lower 2891 UB.getPointer(), // &Upper 2892 ST.getPointer() // &Stride 2893 }; 2894 llvm::Value *Call = 2895 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2896 return CGF.EmitScalarConversion( 2897 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2898 CGF.getContext().BoolTy, Loc); 2899 } 2900 2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2902 llvm::Value *NumThreads, 2903 SourceLocation Loc) { 2904 if (!CGF.HaveInsertPoint()) 2905 return; 2906 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2907 llvm::Value *Args[] = { 2908 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2909 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2910 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2911 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2912 Args); 2913 } 2914 2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2916 ProcBindKind ProcBind, 2917 SourceLocation Loc) { 2918 if (!CGF.HaveInsertPoint()) 2919 return; 2920 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2921 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2922 llvm::Value *Args[] = { 2923 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2924 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2926 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2927 Args); 2928 } 2929 2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2931 SourceLocation Loc, llvm::AtomicOrdering AO) { 2932 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2933 OMPBuilder.createFlush(CGF.Builder); 2934 } else { 2935 if (!CGF.HaveInsertPoint()) 2936 return; 2937 // Build call void __kmpc_flush(ident_t *loc) 2938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2939 CGM.getModule(), OMPRTL___kmpc_flush), 2940 emitUpdateLocation(CGF, Loc)); 2941 } 2942 } 2943 2944 namespace { 2945 /// Indexes of fields for type kmp_task_t. 2946 enum KmpTaskTFields { 2947 /// List of shared variables. 2948 KmpTaskTShareds, 2949 /// Task routine. 2950 KmpTaskTRoutine, 2951 /// Partition id for the untied tasks. 2952 KmpTaskTPartId, 2953 /// Function with call of destructors for private variables. 2954 Data1, 2955 /// Task priority. 2956 Data2, 2957 /// (Taskloops only) Lower bound. 2958 KmpTaskTLowerBound, 2959 /// (Taskloops only) Upper bound. 2960 KmpTaskTUpperBound, 2961 /// (Taskloops only) Stride. 2962 KmpTaskTStride, 2963 /// (Taskloops only) Is last iteration flag. 2964 KmpTaskTLastIter, 2965 /// (Taskloops only) Reduction data. 2966 KmpTaskTReductions, 2967 }; 2968 } // anonymous namespace 2969 2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2971 return OffloadEntriesTargetRegion.empty() && 2972 OffloadEntriesDeviceGlobalVar.empty(); 2973 } 2974 2975 /// Initialize target region entry. 2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2977 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2978 StringRef ParentName, unsigned LineNum, 2979 unsigned Order) { 2980 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2981 "only required for the device " 2982 "code generation."); 2983 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2984 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2985 OMPTargetRegionEntryTargetRegion); 2986 ++OffloadingEntriesNum; 2987 } 2988 2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 llvm::Constant *Addr, llvm::Constant *ID, 2993 OMPTargetRegionEntryKind Flags) { 2994 // If we are emitting code for a target, the entry is already initialized, 2995 // only has to be registered. 2996 if (CGM.getLangOpts().OpenMPIsDevice) { 2997 // This could happen if the device compilation is invoked standalone. 2998 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2999 initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3000 OffloadingEntriesNum); 3001 auto &Entry = 3002 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3003 Entry.setAddress(Addr); 3004 Entry.setID(ID); 3005 Entry.setFlags(Flags); 3006 } else { 3007 if (Flags == 3008 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3009 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3010 /*IgnoreAddressId*/ true)) 3011 return; 3012 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3013 "Target region entry already registered!"); 3014 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3015 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3016 ++OffloadingEntriesNum; 3017 } 3018 } 3019 3020 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3021 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3022 bool IgnoreAddressId) const { 3023 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3024 if (PerDevice == OffloadEntriesTargetRegion.end()) 3025 return false; 3026 auto PerFile = PerDevice->second.find(FileID); 3027 if (PerFile == PerDevice->second.end()) 3028 return false; 3029 auto PerParentName = PerFile->second.find(ParentName); 3030 if (PerParentName == PerFile->second.end()) 3031 return false; 3032 auto PerLine = PerParentName->second.find(LineNum); 3033 if (PerLine == PerParentName->second.end()) 3034 return false; 3035 // Fail if this entry is already registered. 3036 if (!IgnoreAddressId && 3037 (PerLine->second.getAddress() || PerLine->second.getID())) 3038 return false; 3039 return true; 3040 } 3041 3042 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3043 const OffloadTargetRegionEntryInfoActTy &Action) { 3044 // Scan all target region entries and perform the provided action. 3045 for (const auto &D : OffloadEntriesTargetRegion) 3046 for (const auto &F : D.second) 3047 for (const auto &P : F.second) 3048 for (const auto &L : P.second) 3049 Action(D.first, F.first, P.first(), L.first, L.second); 3050 } 3051 3052 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3053 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3054 OMPTargetGlobalVarEntryKind Flags, 3055 unsigned Order) { 3056 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3057 "only required for the device " 3058 "code generation."); 3059 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3060 ++OffloadingEntriesNum; 3061 } 3062 3063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3064 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3065 CharUnits VarSize, 3066 OMPTargetGlobalVarEntryKind Flags, 3067 llvm::GlobalValue::LinkageTypes Linkage) { 3068 if (CGM.getLangOpts().OpenMPIsDevice) { 3069 // This could happen if the device compilation is invoked standalone. 3070 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3071 initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); 3072 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3073 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3074 "Resetting with the new address."); 3075 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3076 if (Entry.getVarSize().isZero()) { 3077 Entry.setVarSize(VarSize); 3078 Entry.setLinkage(Linkage); 3079 } 3080 return; 3081 } 3082 Entry.setVarSize(VarSize); 3083 Entry.setLinkage(Linkage); 3084 Entry.setAddress(Addr); 3085 } else { 3086 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3087 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3088 assert(Entry.isValid() && Entry.getFlags() == Flags && 3089 "Entry not initialized!"); 3090 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3091 "Resetting with the new address."); 3092 if (Entry.getVarSize().isZero()) { 3093 Entry.setVarSize(VarSize); 3094 Entry.setLinkage(Linkage); 3095 } 3096 return; 3097 } 3098 OffloadEntriesDeviceGlobalVar.try_emplace( 3099 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3100 ++OffloadingEntriesNum; 3101 } 3102 } 3103 3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3105 actOnDeviceGlobalVarEntriesInfo( 3106 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3107 // Scan all target region entries and perform the provided action. 3108 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3109 Action(E.getKey(), E.getValue()); 3110 } 3111 3112 void CGOpenMPRuntime::createOffloadEntry( 3113 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3114 llvm::GlobalValue::LinkageTypes Linkage) { 3115 StringRef Name = Addr->getName(); 3116 llvm::Module &M = CGM.getModule(); 3117 llvm::LLVMContext &C = M.getContext(); 3118 3119 // Create constant string with the name. 3120 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3121 3122 std::string StringName = getName({"omp_offloading", "entry_name"}); 3123 auto *Str = new llvm::GlobalVariable( 3124 M, StrPtrInit->getType(), /*isConstant=*/true, 3125 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3126 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3127 3128 llvm::Constant *Data[] = { 3129 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3130 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3131 llvm::ConstantInt::get(CGM.SizeTy, Size), 3132 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3133 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3134 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3135 llvm::GlobalVariable *Entry = createGlobalStruct( 3136 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3137 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3138 3139 // The entry has to be created in the section the linker expects it to be. 3140 Entry->setSection("omp_offloading_entries"); 3141 } 3142 3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3144 // Emit the offloading entries and metadata so that the device codegen side 3145 // can easily figure out what to emit. The produced metadata looks like 3146 // this: 3147 // 3148 // !omp_offload.info = !{!1, ...} 3149 // 3150 // Right now we only generate metadata for function that contain target 3151 // regions. 3152 3153 // If we are in simd mode or there are no entries, we don't need to do 3154 // anything. 3155 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3156 return; 3157 3158 llvm::Module &M = CGM.getModule(); 3159 llvm::LLVMContext &C = M.getContext(); 3160 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3161 SourceLocation, StringRef>, 3162 16> 3163 OrderedEntries(OffloadEntriesInfoManager.size()); 3164 llvm::SmallVector<StringRef, 16> ParentFunctions( 3165 OffloadEntriesInfoManager.size()); 3166 3167 // Auxiliary methods to create metadata values and strings. 3168 auto &&GetMDInt = [this](unsigned V) { 3169 return llvm::ConstantAsMetadata::get( 3170 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3171 }; 3172 3173 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3174 3175 // Create the offloading info metadata node. 3176 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3177 3178 // Create function that emits metadata for each target region entry; 3179 auto &&TargetRegionMetadataEmitter = 3180 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3181 &GetMDString]( 3182 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3183 unsigned Line, 3184 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3185 // Generate metadata for target regions. Each entry of this metadata 3186 // contains: 3187 // - Entry 0 -> Kind of this type of metadata (0). 3188 // - Entry 1 -> Device ID of the file where the entry was identified. 3189 // - Entry 2 -> File ID of the file where the entry was identified. 3190 // - Entry 3 -> Mangled name of the function where the entry was 3191 // identified. 3192 // - Entry 4 -> Line in the file where the entry was identified. 3193 // - Entry 5 -> Order the entry was created. 3194 // The first element of the metadata node is the kind. 3195 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3196 GetMDInt(FileID), GetMDString(ParentName), 3197 GetMDInt(Line), GetMDInt(E.getOrder())}; 3198 3199 SourceLocation Loc; 3200 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3201 E = CGM.getContext().getSourceManager().fileinfo_end(); 3202 I != E; ++I) { 3203 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3204 I->getFirst()->getUniqueID().getFile() == FileID) { 3205 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3206 I->getFirst(), Line, 1); 3207 break; 3208 } 3209 } 3210 // Save this entry in the right position of the ordered entries array. 3211 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3212 ParentFunctions[E.getOrder()] = ParentName; 3213 3214 // Add metadata to the named metadata node. 3215 MD->addOperand(llvm::MDNode::get(C, Ops)); 3216 }; 3217 3218 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3219 TargetRegionMetadataEmitter); 3220 3221 // Create function that emits metadata for each device global variable entry; 3222 auto &&DeviceGlobalVarMetadataEmitter = 3223 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3224 MD](StringRef MangledName, 3225 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3226 &E) { 3227 // Generate metadata for global variables. Each entry of this metadata 3228 // contains: 3229 // - Entry 0 -> Kind of this type of metadata (1). 3230 // - Entry 1 -> Mangled name of the variable. 3231 // - Entry 2 -> Declare target kind. 3232 // - Entry 3 -> Order the entry was created. 3233 // The first element of the metadata node is the kind. 3234 llvm::Metadata *Ops[] = { 3235 GetMDInt(E.getKind()), GetMDString(MangledName), 3236 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3237 3238 // Save this entry in the right position of the ordered entries array. 3239 OrderedEntries[E.getOrder()] = 3240 std::make_tuple(&E, SourceLocation(), MangledName); 3241 3242 // Add metadata to the named metadata node. 3243 MD->addOperand(llvm::MDNode::get(C, Ops)); 3244 }; 3245 3246 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3247 DeviceGlobalVarMetadataEmitter); 3248 3249 for (const auto &E : OrderedEntries) { 3250 assert(std::get<0>(E) && "All ordered entries must exist!"); 3251 if (const auto *CE = 3252 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3253 std::get<0>(E))) { 3254 if (!CE->getID() || !CE->getAddress()) { 3255 // Do not blame the entry if the parent funtion is not emitted. 3256 StringRef FnName = ParentFunctions[CE->getOrder()]; 3257 if (!CGM.GetGlobalValue(FnName)) 3258 continue; 3259 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3260 DiagnosticsEngine::Error, 3261 "Offloading entry for target region in %0 is incorrect: either the " 3262 "address or the ID is invalid."); 3263 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3264 continue; 3265 } 3266 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3267 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3268 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3269 OffloadEntryInfoDeviceGlobalVar>( 3270 std::get<0>(E))) { 3271 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3272 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3273 CE->getFlags()); 3274 switch (Flags) { 3275 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3276 if (CGM.getLangOpts().OpenMPIsDevice && 3277 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3278 continue; 3279 if (!CE->getAddress()) { 3280 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3281 DiagnosticsEngine::Error, "Offloading entry for declare target " 3282 "variable %0 is incorrect: the " 3283 "address is invalid."); 3284 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3285 continue; 3286 } 3287 // The vaiable has no definition - no need to add the entry. 3288 if (CE->getVarSize().isZero()) 3289 continue; 3290 break; 3291 } 3292 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3293 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3294 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3295 "Declaret target link address is set."); 3296 if (CGM.getLangOpts().OpenMPIsDevice) 3297 continue; 3298 if (!CE->getAddress()) { 3299 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3300 DiagnosticsEngine::Error, 3301 "Offloading entry for declare target variable is incorrect: the " 3302 "address is invalid."); 3303 CGM.getDiags().Report(DiagID); 3304 continue; 3305 } 3306 break; 3307 } 3308 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3309 CE->getVarSize().getQuantity(), Flags, 3310 CE->getLinkage()); 3311 } else { 3312 llvm_unreachable("Unsupported entry kind."); 3313 } 3314 } 3315 } 3316 3317 /// Loads all the offload entries information from the host IR 3318 /// metadata. 3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3320 // If we are in target mode, load the metadata from the host IR. This code has 3321 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3322 3323 if (!CGM.getLangOpts().OpenMPIsDevice) 3324 return; 3325 3326 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3327 return; 3328 3329 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3330 if (auto EC = Buf.getError()) { 3331 CGM.getDiags().Report(diag::err_cannot_open_file) 3332 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3333 return; 3334 } 3335 3336 llvm::LLVMContext C; 3337 auto ME = expectedToErrorOrAndEmitErrors( 3338 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3339 3340 if (auto EC = ME.getError()) { 3341 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3342 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3343 CGM.getDiags().Report(DiagID) 3344 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3345 return; 3346 } 3347 3348 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3349 if (!MD) 3350 return; 3351 3352 for (llvm::MDNode *MN : MD->operands()) { 3353 auto &&GetMDInt = [MN](unsigned Idx) { 3354 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3355 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3356 }; 3357 3358 auto &&GetMDString = [MN](unsigned Idx) { 3359 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3360 return V->getString(); 3361 }; 3362 3363 switch (GetMDInt(0)) { 3364 default: 3365 llvm_unreachable("Unexpected metadata!"); 3366 break; 3367 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3368 OffloadingEntryInfoTargetRegion: 3369 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3370 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3371 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3372 /*Order=*/GetMDInt(5)); 3373 break; 3374 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3375 OffloadingEntryInfoDeviceGlobalVar: 3376 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3377 /*MangledName=*/GetMDString(1), 3378 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3379 /*Flags=*/GetMDInt(2)), 3380 /*Order=*/GetMDInt(3)); 3381 break; 3382 } 3383 } 3384 } 3385 3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3387 if (!KmpRoutineEntryPtrTy) { 3388 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3389 ASTContext &C = CGM.getContext(); 3390 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3391 FunctionProtoType::ExtProtoInfo EPI; 3392 KmpRoutineEntryPtrQTy = C.getPointerType( 3393 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3394 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3395 } 3396 } 3397 3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3399 // Make sure the type of the entry is already created. This is the type we 3400 // have to create: 3401 // struct __tgt_offload_entry{ 3402 // void *addr; // Pointer to the offload entry info. 3403 // // (function or global) 3404 // char *name; // Name of the function or global. 3405 // size_t size; // Size of the entry info (0 if it a function). 3406 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3407 // int32_t reserved; // Reserved, to use by the runtime library. 3408 // }; 3409 if (TgtOffloadEntryQTy.isNull()) { 3410 ASTContext &C = CGM.getContext(); 3411 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3412 RD->startDefinition(); 3413 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3414 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3415 addFieldToRecordDecl(C, RD, C.getSizeType()); 3416 addFieldToRecordDecl( 3417 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3418 addFieldToRecordDecl( 3419 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3420 RD->completeDefinition(); 3421 RD->addAttr(PackedAttr::CreateImplicit(C)); 3422 TgtOffloadEntryQTy = C.getRecordType(RD); 3423 } 3424 return TgtOffloadEntryQTy; 3425 } 3426 3427 namespace { 3428 struct PrivateHelpersTy { 3429 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3430 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3431 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3432 PrivateElemInit(PrivateElemInit) {} 3433 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3434 const Expr *OriginalRef = nullptr; 3435 const VarDecl *Original = nullptr; 3436 const VarDecl *PrivateCopy = nullptr; 3437 const VarDecl *PrivateElemInit = nullptr; 3438 bool isLocalPrivate() const { 3439 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3440 } 3441 }; 3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3443 } // anonymous namespace 3444 3445 static bool isAllocatableDecl(const VarDecl *VD) { 3446 const VarDecl *CVD = VD->getCanonicalDecl(); 3447 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3448 return false; 3449 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3450 // Use the default allocation. 3451 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3452 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3453 !AA->getAllocator()); 3454 } 3455 3456 static RecordDecl * 3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3458 if (!Privates.empty()) { 3459 ASTContext &C = CGM.getContext(); 3460 // Build struct .kmp_privates_t. { 3461 // /* private vars */ 3462 // }; 3463 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3464 RD->startDefinition(); 3465 for (const auto &Pair : Privates) { 3466 const VarDecl *VD = Pair.second.Original; 3467 QualType Type = VD->getType().getNonReferenceType(); 3468 // If the private variable is a local variable with lvalue ref type, 3469 // allocate the pointer instead of the pointee type. 3470 if (Pair.second.isLocalPrivate()) { 3471 if (VD->getType()->isLValueReferenceType()) 3472 Type = C.getPointerType(Type); 3473 if (isAllocatableDecl(VD)) 3474 Type = C.getPointerType(Type); 3475 } 3476 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3477 if (VD->hasAttrs()) { 3478 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3479 E(VD->getAttrs().end()); 3480 I != E; ++I) 3481 FD->addAttr(*I); 3482 } 3483 } 3484 RD->completeDefinition(); 3485 return RD; 3486 } 3487 return nullptr; 3488 } 3489 3490 static RecordDecl * 3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3492 QualType KmpInt32Ty, 3493 QualType KmpRoutineEntryPointerQTy) { 3494 ASTContext &C = CGM.getContext(); 3495 // Build struct kmp_task_t { 3496 // void * shareds; 3497 // kmp_routine_entry_t routine; 3498 // kmp_int32 part_id; 3499 // kmp_cmplrdata_t data1; 3500 // kmp_cmplrdata_t data2; 3501 // For taskloops additional fields: 3502 // kmp_uint64 lb; 3503 // kmp_uint64 ub; 3504 // kmp_int64 st; 3505 // kmp_int32 liter; 3506 // void * reductions; 3507 // }; 3508 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3509 UD->startDefinition(); 3510 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3511 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3512 UD->completeDefinition(); 3513 QualType KmpCmplrdataTy = C.getRecordType(UD); 3514 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3515 RD->startDefinition(); 3516 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3517 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3518 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3519 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3520 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3521 if (isOpenMPTaskLoopDirective(Kind)) { 3522 QualType KmpUInt64Ty = 3523 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3524 QualType KmpInt64Ty = 3525 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3526 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3527 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3528 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3529 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3530 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3531 } 3532 RD->completeDefinition(); 3533 return RD; 3534 } 3535 3536 static RecordDecl * 3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3538 ArrayRef<PrivateDataTy> Privates) { 3539 ASTContext &C = CGM.getContext(); 3540 // Build struct kmp_task_t_with_privates { 3541 // kmp_task_t task_data; 3542 // .kmp_privates_t. privates; 3543 // }; 3544 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3545 RD->startDefinition(); 3546 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3547 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3548 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3549 RD->completeDefinition(); 3550 return RD; 3551 } 3552 3553 /// Emit a proxy function which accepts kmp_task_t as the second 3554 /// argument. 3555 /// \code 3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3557 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3558 /// For taskloops: 3559 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3560 /// tt->reductions, tt->shareds); 3561 /// return 0; 3562 /// } 3563 /// \endcode 3564 static llvm::Function * 3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3566 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3567 QualType KmpTaskTWithPrivatesPtrQTy, 3568 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3569 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3570 llvm::Value *TaskPrivatesMap) { 3571 ASTContext &C = CGM.getContext(); 3572 FunctionArgList Args; 3573 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3574 ImplicitParamDecl::Other); 3575 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3576 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3577 ImplicitParamDecl::Other); 3578 Args.push_back(&GtidArg); 3579 Args.push_back(&TaskTypeArg); 3580 const auto &TaskEntryFnInfo = 3581 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3582 llvm::FunctionType *TaskEntryTy = 3583 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3584 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3585 auto *TaskEntry = llvm::Function::Create( 3586 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3587 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3588 TaskEntry->setDoesNotRecurse(); 3589 CodeGenFunction CGF(CGM); 3590 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3591 Loc, Loc); 3592 3593 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3594 // tt, 3595 // For taskloops: 3596 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3597 // tt->task_data.shareds); 3598 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3599 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3600 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3601 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3602 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3603 const auto *KmpTaskTWithPrivatesQTyRD = 3604 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3605 LValue Base = 3606 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3607 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3608 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3609 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3610 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3611 3612 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3613 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3614 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3615 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3616 CGF.ConvertTypeForMem(SharedsPtrTy)); 3617 3618 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3619 llvm::Value *PrivatesParam; 3620 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3621 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3622 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3623 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3624 } else { 3625 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3626 } 3627 3628 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3629 TaskPrivatesMap, 3630 CGF.Builder 3631 .CreatePointerBitCastOrAddrSpaceCast( 3632 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3633 .getPointer()}; 3634 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3635 std::end(CommonArgs)); 3636 if (isOpenMPTaskLoopDirective(Kind)) { 3637 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3638 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3639 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3640 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3641 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3642 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3643 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3644 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3645 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3646 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3647 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3648 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3649 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3650 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3651 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3652 CallArgs.push_back(LBParam); 3653 CallArgs.push_back(UBParam); 3654 CallArgs.push_back(StParam); 3655 CallArgs.push_back(LIParam); 3656 CallArgs.push_back(RParam); 3657 } 3658 CallArgs.push_back(SharedsParam); 3659 3660 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3661 CallArgs); 3662 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3663 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3664 CGF.FinishFunction(); 3665 return TaskEntry; 3666 } 3667 3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3669 SourceLocation Loc, 3670 QualType KmpInt32Ty, 3671 QualType KmpTaskTWithPrivatesPtrQTy, 3672 QualType KmpTaskTWithPrivatesQTy) { 3673 ASTContext &C = CGM.getContext(); 3674 FunctionArgList Args; 3675 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3676 ImplicitParamDecl::Other); 3677 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3678 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3679 ImplicitParamDecl::Other); 3680 Args.push_back(&GtidArg); 3681 Args.push_back(&TaskTypeArg); 3682 const auto &DestructorFnInfo = 3683 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3684 llvm::FunctionType *DestructorFnTy = 3685 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3686 std::string Name = 3687 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3688 auto *DestructorFn = 3689 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3690 Name, &CGM.getModule()); 3691 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3692 DestructorFnInfo); 3693 DestructorFn->setDoesNotRecurse(); 3694 CodeGenFunction CGF(CGM); 3695 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3696 Args, Loc, Loc); 3697 3698 LValue Base = CGF.EmitLoadOfPointerLValue( 3699 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3700 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3701 const auto *KmpTaskTWithPrivatesQTyRD = 3702 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3703 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3704 Base = CGF.EmitLValueForField(Base, *FI); 3705 for (const auto *Field : 3706 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3707 if (QualType::DestructionKind DtorKind = 3708 Field->getType().isDestructedType()) { 3709 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3710 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3711 } 3712 } 3713 CGF.FinishFunction(); 3714 return DestructorFn; 3715 } 3716 3717 /// Emit a privates mapping function for correct handling of private and 3718 /// firstprivate variables. 3719 /// \code 3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3721 /// **noalias priv1,..., <tyn> **noalias privn) { 3722 /// *priv1 = &.privates.priv1; 3723 /// ...; 3724 /// *privn = &.privates.privn; 3725 /// } 3726 /// \endcode 3727 static llvm::Value * 3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3729 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3730 ArrayRef<PrivateDataTy> Privates) { 3731 ASTContext &C = CGM.getContext(); 3732 FunctionArgList Args; 3733 ImplicitParamDecl TaskPrivatesArg( 3734 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3735 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3736 ImplicitParamDecl::Other); 3737 Args.push_back(&TaskPrivatesArg); 3738 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3739 unsigned Counter = 1; 3740 for (const Expr *E : Data.PrivateVars) { 3741 Args.push_back(ImplicitParamDecl::Create( 3742 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3743 C.getPointerType(C.getPointerType(E->getType())) 3744 .withConst() 3745 .withRestrict(), 3746 ImplicitParamDecl::Other)); 3747 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3748 PrivateVarsPos[VD] = Counter; 3749 ++Counter; 3750 } 3751 for (const Expr *E : Data.FirstprivateVars) { 3752 Args.push_back(ImplicitParamDecl::Create( 3753 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3754 C.getPointerType(C.getPointerType(E->getType())) 3755 .withConst() 3756 .withRestrict(), 3757 ImplicitParamDecl::Other)); 3758 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3759 PrivateVarsPos[VD] = Counter; 3760 ++Counter; 3761 } 3762 for (const Expr *E : Data.LastprivateVars) { 3763 Args.push_back(ImplicitParamDecl::Create( 3764 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3765 C.getPointerType(C.getPointerType(E->getType())) 3766 .withConst() 3767 .withRestrict(), 3768 ImplicitParamDecl::Other)); 3769 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3770 PrivateVarsPos[VD] = Counter; 3771 ++Counter; 3772 } 3773 for (const VarDecl *VD : Data.PrivateLocals) { 3774 QualType Ty = VD->getType().getNonReferenceType(); 3775 if (VD->getType()->isLValueReferenceType()) 3776 Ty = C.getPointerType(Ty); 3777 if (isAllocatableDecl(VD)) 3778 Ty = C.getPointerType(Ty); 3779 Args.push_back(ImplicitParamDecl::Create( 3780 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3781 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3782 ImplicitParamDecl::Other)); 3783 PrivateVarsPos[VD] = Counter; 3784 ++Counter; 3785 } 3786 const auto &TaskPrivatesMapFnInfo = 3787 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3788 llvm::FunctionType *TaskPrivatesMapTy = 3789 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3790 std::string Name = 3791 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3792 auto *TaskPrivatesMap = llvm::Function::Create( 3793 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3794 &CGM.getModule()); 3795 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3796 TaskPrivatesMapFnInfo); 3797 if (CGM.getLangOpts().Optimize) { 3798 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3799 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3800 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3801 } 3802 CodeGenFunction CGF(CGM); 3803 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3804 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3805 3806 // *privi = &.privates.privi; 3807 LValue Base = CGF.EmitLoadOfPointerLValue( 3808 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3809 TaskPrivatesArg.getType()->castAs<PointerType>()); 3810 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3811 Counter = 0; 3812 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3813 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3814 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3815 LValue RefLVal = 3816 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3817 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3818 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3819 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3820 ++Counter; 3821 } 3822 CGF.FinishFunction(); 3823 return TaskPrivatesMap; 3824 } 3825 3826 /// Emit initialization for private variables in task-based directives. 3827 static void emitPrivatesInit(CodeGenFunction &CGF, 3828 const OMPExecutableDirective &D, 3829 Address KmpTaskSharedsPtr, LValue TDBase, 3830 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3831 QualType SharedsTy, QualType SharedsPtrTy, 3832 const OMPTaskDataTy &Data, 3833 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3834 ASTContext &C = CGF.getContext(); 3835 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3836 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3837 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3838 ? OMPD_taskloop 3839 : OMPD_task; 3840 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3841 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3842 LValue SrcBase; 3843 bool IsTargetTask = 3844 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3845 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3846 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3847 // PointersArray, SizesArray, and MappersArray. The original variables for 3848 // these arrays are not captured and we get their addresses explicitly. 3849 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3850 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3851 SrcBase = CGF.MakeAddrLValue( 3852 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3853 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3854 SharedsTy); 3855 } 3856 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3857 for (const PrivateDataTy &Pair : Privates) { 3858 // Do not initialize private locals. 3859 if (Pair.second.isLocalPrivate()) { 3860 ++FI; 3861 continue; 3862 } 3863 const VarDecl *VD = Pair.second.PrivateCopy; 3864 const Expr *Init = VD->getAnyInitializer(); 3865 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3866 !CGF.isTrivialInitializer(Init)))) { 3867 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3868 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3869 const VarDecl *OriginalVD = Pair.second.Original; 3870 // Check if the variable is the target-based BasePointersArray, 3871 // PointersArray, SizesArray, or MappersArray. 3872 LValue SharedRefLValue; 3873 QualType Type = PrivateLValue.getType(); 3874 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3875 if (IsTargetTask && !SharedField) { 3876 assert(isa<ImplicitParamDecl>(OriginalVD) && 3877 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3878 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3879 ->getNumParams() == 0 && 3880 isa<TranslationUnitDecl>( 3881 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3882 ->getDeclContext()) && 3883 "Expected artificial target data variable."); 3884 SharedRefLValue = 3885 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3886 } else if (ForDup) { 3887 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3888 SharedRefLValue = CGF.MakeAddrLValue( 3889 Address(SharedRefLValue.getPointer(CGF), 3890 C.getDeclAlign(OriginalVD)), 3891 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3892 SharedRefLValue.getTBAAInfo()); 3893 } else if (CGF.LambdaCaptureFields.count( 3894 Pair.second.Original->getCanonicalDecl()) > 0 || 3895 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3896 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3897 } else { 3898 // Processing for implicitly captured variables. 3899 InlinedOpenMPRegionRAII Region( 3900 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3901 /*HasCancel=*/false, /*NoInheritance=*/true); 3902 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3903 } 3904 if (Type->isArrayType()) { 3905 // Initialize firstprivate array. 3906 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3907 // Perform simple memcpy. 3908 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3909 } else { 3910 // Initialize firstprivate array using element-by-element 3911 // initialization. 3912 CGF.EmitOMPAggregateAssign( 3913 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3914 Type, 3915 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3916 Address SrcElement) { 3917 // Clean up any temporaries needed by the initialization. 3918 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3919 InitScope.addPrivate( 3920 Elem, [SrcElement]() -> Address { return SrcElement; }); 3921 (void)InitScope.Privatize(); 3922 // Emit initialization for single element. 3923 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3924 CGF, &CapturesInfo); 3925 CGF.EmitAnyExprToMem(Init, DestElement, 3926 Init->getType().getQualifiers(), 3927 /*IsInitializer=*/false); 3928 }); 3929 } 3930 } else { 3931 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3932 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3933 return SharedRefLValue.getAddress(CGF); 3934 }); 3935 (void)InitScope.Privatize(); 3936 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3937 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3938 /*capturedByInit=*/false); 3939 } 3940 } else { 3941 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3942 } 3943 } 3944 ++FI; 3945 } 3946 } 3947 3948 /// Check if duplication function is required for taskloops. 3949 static bool checkInitIsRequired(CodeGenFunction &CGF, 3950 ArrayRef<PrivateDataTy> Privates) { 3951 bool InitRequired = false; 3952 for (const PrivateDataTy &Pair : Privates) { 3953 if (Pair.second.isLocalPrivate()) 3954 continue; 3955 const VarDecl *VD = Pair.second.PrivateCopy; 3956 const Expr *Init = VD->getAnyInitializer(); 3957 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3958 !CGF.isTrivialInitializer(Init)); 3959 if (InitRequired) 3960 break; 3961 } 3962 return InitRequired; 3963 } 3964 3965 3966 /// Emit task_dup function (for initialization of 3967 /// private/firstprivate/lastprivate vars and last_iter flag) 3968 /// \code 3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3970 /// lastpriv) { 3971 /// // setup lastprivate flag 3972 /// task_dst->last = lastpriv; 3973 /// // could be constructor calls here... 3974 /// } 3975 /// \endcode 3976 static llvm::Value * 3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3978 const OMPExecutableDirective &D, 3979 QualType KmpTaskTWithPrivatesPtrQTy, 3980 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3981 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3982 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3983 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3984 ASTContext &C = CGM.getContext(); 3985 FunctionArgList Args; 3986 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3987 KmpTaskTWithPrivatesPtrQTy, 3988 ImplicitParamDecl::Other); 3989 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3990 KmpTaskTWithPrivatesPtrQTy, 3991 ImplicitParamDecl::Other); 3992 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3993 ImplicitParamDecl::Other); 3994 Args.push_back(&DstArg); 3995 Args.push_back(&SrcArg); 3996 Args.push_back(&LastprivArg); 3997 const auto &TaskDupFnInfo = 3998 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3999 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4000 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4001 auto *TaskDup = llvm::Function::Create( 4002 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4003 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4004 TaskDup->setDoesNotRecurse(); 4005 CodeGenFunction CGF(CGM); 4006 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4007 Loc); 4008 4009 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4010 CGF.GetAddrOfLocalVar(&DstArg), 4011 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4012 // task_dst->liter = lastpriv; 4013 if (WithLastIter) { 4014 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4015 LValue Base = CGF.EmitLValueForField( 4016 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4017 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4018 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4019 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4020 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4021 } 4022 4023 // Emit initial values for private copies (if any). 4024 assert(!Privates.empty()); 4025 Address KmpTaskSharedsPtr = Address::invalid(); 4026 if (!Data.FirstprivateVars.empty()) { 4027 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4028 CGF.GetAddrOfLocalVar(&SrcArg), 4029 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4030 LValue Base = CGF.EmitLValueForField( 4031 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4032 KmpTaskSharedsPtr = Address( 4033 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4034 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4035 KmpTaskTShareds)), 4036 Loc), 4037 CGM.getNaturalTypeAlignment(SharedsTy)); 4038 } 4039 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4040 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4041 CGF.FinishFunction(); 4042 return TaskDup; 4043 } 4044 4045 /// Checks if destructor function is required to be generated. 4046 /// \return true if cleanups are required, false otherwise. 4047 static bool 4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4049 ArrayRef<PrivateDataTy> Privates) { 4050 for (const PrivateDataTy &P : Privates) { 4051 if (P.second.isLocalPrivate()) 4052 continue; 4053 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4054 if (Ty.isDestructedType()) 4055 return true; 4056 } 4057 return false; 4058 } 4059 4060 namespace { 4061 /// Loop generator for OpenMP iterator expression. 4062 class OMPIteratorGeneratorScope final 4063 : public CodeGenFunction::OMPPrivateScope { 4064 CodeGenFunction &CGF; 4065 const OMPIteratorExpr *E = nullptr; 4066 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4067 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4068 OMPIteratorGeneratorScope() = delete; 4069 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4070 4071 public: 4072 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4073 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4074 if (!E) 4075 return; 4076 SmallVector<llvm::Value *, 4> Uppers; 4077 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4078 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4079 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4080 addPrivate(VD, [&CGF, VD]() { 4081 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4082 }); 4083 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4084 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4085 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4086 "counter.addr"); 4087 }); 4088 } 4089 Privatize(); 4090 4091 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4092 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4093 LValue CLVal = 4094 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4095 HelperData.CounterVD->getType()); 4096 // Counter = 0; 4097 CGF.EmitStoreOfScalar( 4098 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4099 CLVal); 4100 CodeGenFunction::JumpDest &ContDest = 4101 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4102 CodeGenFunction::JumpDest &ExitDest = 4103 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4104 // N = <number-of_iterations>; 4105 llvm::Value *N = Uppers[I]; 4106 // cont: 4107 // if (Counter < N) goto body; else goto exit; 4108 CGF.EmitBlock(ContDest.getBlock()); 4109 auto *CVal = 4110 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4111 llvm::Value *Cmp = 4112 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4113 ? CGF.Builder.CreateICmpSLT(CVal, N) 4114 : CGF.Builder.CreateICmpULT(CVal, N); 4115 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4116 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4117 // body: 4118 CGF.EmitBlock(BodyBB); 4119 // Iteri = Begini + Counter * Stepi; 4120 CGF.EmitIgnoredExpr(HelperData.Update); 4121 } 4122 } 4123 ~OMPIteratorGeneratorScope() { 4124 if (!E) 4125 return; 4126 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4127 // Counter = Counter + 1; 4128 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4129 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4130 // goto cont; 4131 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4132 // exit: 4133 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4134 } 4135 } 4136 }; 4137 } // namespace 4138 4139 static std::pair<llvm::Value *, llvm::Value *> 4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4141 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4142 llvm::Value *Addr; 4143 if (OASE) { 4144 const Expr *Base = OASE->getBase(); 4145 Addr = CGF.EmitScalarExpr(Base); 4146 } else { 4147 Addr = CGF.EmitLValue(E).getPointer(CGF); 4148 } 4149 llvm::Value *SizeVal; 4150 QualType Ty = E->getType(); 4151 if (OASE) { 4152 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4153 for (const Expr *SE : OASE->getDimensions()) { 4154 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4155 Sz = CGF.EmitScalarConversion( 4156 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4157 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4158 } 4159 } else if (const auto *ASE = 4160 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4161 LValue UpAddrLVal = 4162 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4163 llvm::Value *UpAddr = 4164 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4165 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4166 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4167 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4168 } else { 4169 SizeVal = CGF.getTypeSize(Ty); 4170 } 4171 return std::make_pair(Addr, SizeVal); 4172 } 4173 4174 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4175 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4176 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4177 if (KmpTaskAffinityInfoTy.isNull()) { 4178 RecordDecl *KmpAffinityInfoRD = 4179 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4180 KmpAffinityInfoRD->startDefinition(); 4181 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4182 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4183 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4184 KmpAffinityInfoRD->completeDefinition(); 4185 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4186 } 4187 } 4188 4189 CGOpenMPRuntime::TaskResultTy 4190 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4191 const OMPExecutableDirective &D, 4192 llvm::Function *TaskFunction, QualType SharedsTy, 4193 Address Shareds, const OMPTaskDataTy &Data) { 4194 ASTContext &C = CGM.getContext(); 4195 llvm::SmallVector<PrivateDataTy, 4> Privates; 4196 // Aggregate privates and sort them by the alignment. 4197 const auto *I = Data.PrivateCopies.begin(); 4198 for (const Expr *E : Data.PrivateVars) { 4199 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4200 Privates.emplace_back( 4201 C.getDeclAlign(VD), 4202 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4203 /*PrivateElemInit=*/nullptr)); 4204 ++I; 4205 } 4206 I = Data.FirstprivateCopies.begin(); 4207 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4208 for (const Expr *E : Data.FirstprivateVars) { 4209 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4210 Privates.emplace_back( 4211 C.getDeclAlign(VD), 4212 PrivateHelpersTy( 4213 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4214 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4215 ++I; 4216 ++IElemInitRef; 4217 } 4218 I = Data.LastprivateCopies.begin(); 4219 for (const Expr *E : Data.LastprivateVars) { 4220 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4221 Privates.emplace_back( 4222 C.getDeclAlign(VD), 4223 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4224 /*PrivateElemInit=*/nullptr)); 4225 ++I; 4226 } 4227 for (const VarDecl *VD : Data.PrivateLocals) { 4228 if (isAllocatableDecl(VD)) 4229 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4230 else 4231 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4232 } 4233 llvm::stable_sort(Privates, 4234 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4235 return L.first > R.first; 4236 }); 4237 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4238 // Build type kmp_routine_entry_t (if not built yet). 4239 emitKmpRoutineEntryT(KmpInt32Ty); 4240 // Build type kmp_task_t (if not built yet). 4241 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4242 if (SavedKmpTaskloopTQTy.isNull()) { 4243 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4244 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4245 } 4246 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4247 } else { 4248 assert((D.getDirectiveKind() == OMPD_task || 4249 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4250 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4251 "Expected taskloop, task or target directive"); 4252 if (SavedKmpTaskTQTy.isNull()) { 4253 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4254 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4255 } 4256 KmpTaskTQTy = SavedKmpTaskTQTy; 4257 } 4258 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4259 // Build particular struct kmp_task_t for the given task. 4260 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4261 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4262 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4263 QualType KmpTaskTWithPrivatesPtrQTy = 4264 C.getPointerType(KmpTaskTWithPrivatesQTy); 4265 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4266 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4267 KmpTaskTWithPrivatesTy->getPointerTo(); 4268 llvm::Value *KmpTaskTWithPrivatesTySize = 4269 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4270 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4271 4272 // Emit initial values for private copies (if any). 4273 llvm::Value *TaskPrivatesMap = nullptr; 4274 llvm::Type *TaskPrivatesMapTy = 4275 std::next(TaskFunction->arg_begin(), 3)->getType(); 4276 if (!Privates.empty()) { 4277 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4278 TaskPrivatesMap = 4279 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4280 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4281 TaskPrivatesMap, TaskPrivatesMapTy); 4282 } else { 4283 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4284 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4285 } 4286 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4287 // kmp_task_t *tt); 4288 llvm::Function *TaskEntry = emitProxyTaskFunction( 4289 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4290 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4291 TaskPrivatesMap); 4292 4293 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4294 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4295 // kmp_routine_entry_t *task_entry); 4296 // Task flags. Format is taken from 4297 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4298 // description of kmp_tasking_flags struct. 4299 enum { 4300 TiedFlag = 0x1, 4301 FinalFlag = 0x2, 4302 DestructorsFlag = 0x8, 4303 PriorityFlag = 0x20, 4304 DetachableFlag = 0x40, 4305 }; 4306 unsigned Flags = Data.Tied ? TiedFlag : 0; 4307 bool NeedsCleanup = false; 4308 if (!Privates.empty()) { 4309 NeedsCleanup = 4310 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4311 if (NeedsCleanup) 4312 Flags = Flags | DestructorsFlag; 4313 } 4314 if (Data.Priority.getInt()) 4315 Flags = Flags | PriorityFlag; 4316 if (D.hasClausesOfKind<OMPDetachClause>()) 4317 Flags = Flags | DetachableFlag; 4318 llvm::Value *TaskFlags = 4319 Data.Final.getPointer() 4320 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4321 CGF.Builder.getInt32(FinalFlag), 4322 CGF.Builder.getInt32(/*C=*/0)) 4323 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4324 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4325 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4326 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4327 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4328 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4329 TaskEntry, KmpRoutineEntryPtrTy)}; 4330 llvm::Value *NewTask; 4331 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4332 // Check if we have any device clause associated with the directive. 4333 const Expr *Device = nullptr; 4334 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4335 Device = C->getDevice(); 4336 // Emit device ID if any otherwise use default value. 4337 llvm::Value *DeviceID; 4338 if (Device) 4339 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4340 CGF.Int64Ty, /*isSigned=*/true); 4341 else 4342 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4343 AllocArgs.push_back(DeviceID); 4344 NewTask = CGF.EmitRuntimeCall( 4345 OMPBuilder.getOrCreateRuntimeFunction( 4346 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4347 AllocArgs); 4348 } else { 4349 NewTask = 4350 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4351 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4352 AllocArgs); 4353 } 4354 // Emit detach clause initialization. 4355 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4356 // task_descriptor); 4357 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4358 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4359 LValue EvtLVal = CGF.EmitLValue(Evt); 4360 4361 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4362 // int gtid, kmp_task_t *task); 4363 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4364 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4365 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4366 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4367 OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4369 {Loc, Tid, NewTask}); 4370 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4371 Evt->getExprLoc()); 4372 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4373 } 4374 // Process affinity clauses. 4375 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4376 // Process list of affinity data. 4377 ASTContext &C = CGM.getContext(); 4378 Address AffinitiesArray = Address::invalid(); 4379 // Calculate number of elements to form the array of affinity data. 4380 llvm::Value *NumOfElements = nullptr; 4381 unsigned NumAffinities = 0; 4382 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4383 if (const Expr *Modifier = C->getModifier()) { 4384 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4385 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4386 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4387 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4388 NumOfElements = 4389 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4390 } 4391 } else { 4392 NumAffinities += C->varlist_size(); 4393 } 4394 } 4395 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4396 // Fields ids in kmp_task_affinity_info record. 4397 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4398 4399 QualType KmpTaskAffinityInfoArrayTy; 4400 if (NumOfElements) { 4401 NumOfElements = CGF.Builder.CreateNUWAdd( 4402 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4403 OpaqueValueExpr OVE( 4404 Loc, 4405 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4406 VK_RValue); 4407 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4408 RValue::get(NumOfElements)); 4409 KmpTaskAffinityInfoArrayTy = 4410 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4411 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4412 // Properly emit variable-sized array. 4413 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4414 ImplicitParamDecl::Other); 4415 CGF.EmitVarDecl(*PD); 4416 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4417 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4418 /*isSigned=*/false); 4419 } else { 4420 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4421 KmpTaskAffinityInfoTy, 4422 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4423 ArrayType::Normal, /*IndexTypeQuals=*/0); 4424 AffinitiesArray = 4425 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4426 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4427 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4428 /*isSigned=*/false); 4429 } 4430 4431 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4432 // Fill array by elements without iterators. 4433 unsigned Pos = 0; 4434 bool HasIterator = false; 4435 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4436 if (C->getModifier()) { 4437 HasIterator = true; 4438 continue; 4439 } 4440 for (const Expr *E : C->varlists()) { 4441 llvm::Value *Addr; 4442 llvm::Value *Size; 4443 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4444 LValue Base = 4445 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4446 KmpTaskAffinityInfoTy); 4447 // affs[i].base_addr = &<Affinities[i].second>; 4448 LValue BaseAddrLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4450 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4451 BaseAddrLVal); 4452 // affs[i].len = sizeof(<Affinities[i].second>); 4453 LValue LenLVal = CGF.EmitLValueForField( 4454 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4455 CGF.EmitStoreOfScalar(Size, LenLVal); 4456 ++Pos; 4457 } 4458 } 4459 LValue PosLVal; 4460 if (HasIterator) { 4461 PosLVal = CGF.MakeAddrLValue( 4462 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4463 C.getSizeType()); 4464 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4465 } 4466 // Process elements with iterators. 4467 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4468 const Expr *Modifier = C->getModifier(); 4469 if (!Modifier) 4470 continue; 4471 OMPIteratorGeneratorScope IteratorScope( 4472 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4473 for (const Expr *E : C->varlists()) { 4474 llvm::Value *Addr; 4475 llvm::Value *Size; 4476 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4477 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4478 LValue Base = CGF.MakeAddrLValue( 4479 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4480 AffinitiesArray.getAlignment()), 4481 KmpTaskAffinityInfoTy); 4482 // affs[i].base_addr = &<Affinities[i].second>; 4483 LValue BaseAddrLVal = CGF.EmitLValueForField( 4484 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4485 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4486 BaseAddrLVal); 4487 // affs[i].len = sizeof(<Affinities[i].second>); 4488 LValue LenLVal = CGF.EmitLValueForField( 4489 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4490 CGF.EmitStoreOfScalar(Size, LenLVal); 4491 Idx = CGF.Builder.CreateNUWAdd( 4492 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4493 CGF.EmitStoreOfScalar(Idx, PosLVal); 4494 } 4495 } 4496 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4497 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4498 // naffins, kmp_task_affinity_info_t *affin_list); 4499 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4500 llvm::Value *GTid = getThreadID(CGF, Loc); 4501 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4502 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4503 // FIXME: Emit the function and ignore its result for now unless the 4504 // runtime function is properly implemented. 4505 (void)CGF.EmitRuntimeCall( 4506 OMPBuilder.getOrCreateRuntimeFunction( 4507 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4508 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4509 } 4510 llvm::Value *NewTaskNewTaskTTy = 4511 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4512 NewTask, KmpTaskTWithPrivatesPtrTy); 4513 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4514 KmpTaskTWithPrivatesQTy); 4515 LValue TDBase = 4516 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4517 // Fill the data in the resulting kmp_task_t record. 4518 // Copy shareds if there are any. 4519 Address KmpTaskSharedsPtr = Address::invalid(); 4520 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4521 KmpTaskSharedsPtr = 4522 Address(CGF.EmitLoadOfScalar( 4523 CGF.EmitLValueForField( 4524 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4525 KmpTaskTShareds)), 4526 Loc), 4527 CGM.getNaturalTypeAlignment(SharedsTy)); 4528 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4529 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4530 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4531 } 4532 // Emit initial values for private copies (if any). 4533 TaskResultTy Result; 4534 if (!Privates.empty()) { 4535 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4536 SharedsTy, SharedsPtrTy, Data, Privates, 4537 /*ForDup=*/false); 4538 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4539 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4540 Result.TaskDupFn = emitTaskDupFunction( 4541 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4542 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4543 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4544 } 4545 } 4546 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4547 enum { Priority = 0, Destructors = 1 }; 4548 // Provide pointer to function with destructors for privates. 4549 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4550 const RecordDecl *KmpCmplrdataUD = 4551 (*FI)->getType()->getAsUnionType()->getDecl(); 4552 if (NeedsCleanup) { 4553 llvm::Value *DestructorFn = emitDestructorsFunction( 4554 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4555 KmpTaskTWithPrivatesQTy); 4556 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4557 LValue DestructorsLV = CGF.EmitLValueForField( 4558 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4559 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4560 DestructorFn, KmpRoutineEntryPtrTy), 4561 DestructorsLV); 4562 } 4563 // Set priority. 4564 if (Data.Priority.getInt()) { 4565 LValue Data2LV = CGF.EmitLValueForField( 4566 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4567 LValue PriorityLV = CGF.EmitLValueForField( 4568 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4569 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4570 } 4571 Result.NewTask = NewTask; 4572 Result.TaskEntry = TaskEntry; 4573 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4574 Result.TDBase = TDBase; 4575 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4576 return Result; 4577 } 4578 4579 namespace { 4580 /// Dependence kind for RTL. 4581 enum RTLDependenceKindTy { 4582 DepIn = 0x01, 4583 DepInOut = 0x3, 4584 DepMutexInOutSet = 0x4 4585 }; 4586 /// Fields ids in kmp_depend_info record. 4587 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4588 } // namespace 4589 4590 /// Translates internal dependency kind into the runtime kind. 4591 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4592 RTLDependenceKindTy DepKind; 4593 switch (K) { 4594 case OMPC_DEPEND_in: 4595 DepKind = DepIn; 4596 break; 4597 // Out and InOut dependencies must use the same code. 4598 case OMPC_DEPEND_out: 4599 case OMPC_DEPEND_inout: 4600 DepKind = DepInOut; 4601 break; 4602 case OMPC_DEPEND_mutexinoutset: 4603 DepKind = DepMutexInOutSet; 4604 break; 4605 case OMPC_DEPEND_source: 4606 case OMPC_DEPEND_sink: 4607 case OMPC_DEPEND_depobj: 4608 case OMPC_DEPEND_unknown: 4609 llvm_unreachable("Unknown task dependence type"); 4610 } 4611 return DepKind; 4612 } 4613 4614 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4615 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4616 QualType &FlagsTy) { 4617 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4618 if (KmpDependInfoTy.isNull()) { 4619 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4620 KmpDependInfoRD->startDefinition(); 4621 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4622 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4623 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4624 KmpDependInfoRD->completeDefinition(); 4625 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4626 } 4627 } 4628 4629 std::pair<llvm::Value *, LValue> 4630 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4631 SourceLocation Loc) { 4632 ASTContext &C = CGM.getContext(); 4633 QualType FlagsTy; 4634 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4635 RecordDecl *KmpDependInfoRD = 4636 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4637 LValue Base = CGF.EmitLoadOfPointerLValue( 4638 DepobjLVal.getAddress(CGF), 4639 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4640 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4641 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4642 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4643 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4644 Base.getTBAAInfo()); 4645 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4646 Addr.getPointer(), 4647 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4648 LValue NumDepsBase = CGF.MakeAddrLValue( 4649 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4650 Base.getBaseInfo(), Base.getTBAAInfo()); 4651 // NumDeps = deps[i].base_addr; 4652 LValue BaseAddrLVal = CGF.EmitLValueForField( 4653 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4654 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4655 return std::make_pair(NumDeps, Base); 4656 } 4657 4658 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4659 llvm::PointerUnion<unsigned *, LValue *> Pos, 4660 const OMPTaskDataTy::DependData &Data, 4661 Address DependenciesArray) { 4662 CodeGenModule &CGM = CGF.CGM; 4663 ASTContext &C = CGM.getContext(); 4664 QualType FlagsTy; 4665 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4666 RecordDecl *KmpDependInfoRD = 4667 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4668 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4669 4670 OMPIteratorGeneratorScope IteratorScope( 4671 CGF, cast_or_null<OMPIteratorExpr>( 4672 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4673 : nullptr)); 4674 for (const Expr *E : Data.DepExprs) { 4675 llvm::Value *Addr; 4676 llvm::Value *Size; 4677 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4678 LValue Base; 4679 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4680 Base = CGF.MakeAddrLValue( 4681 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4682 } else { 4683 LValue &PosLVal = *Pos.get<LValue *>(); 4684 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4685 Base = CGF.MakeAddrLValue( 4686 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4687 DependenciesArray.getAlignment()), 4688 KmpDependInfoTy); 4689 } 4690 // deps[i].base_addr = &<Dependencies[i].second>; 4691 LValue BaseAddrLVal = CGF.EmitLValueForField( 4692 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4693 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4694 BaseAddrLVal); 4695 // deps[i].len = sizeof(<Dependencies[i].second>); 4696 LValue LenLVal = CGF.EmitLValueForField( 4697 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4698 CGF.EmitStoreOfScalar(Size, LenLVal); 4699 // deps[i].flags = <Dependencies[i].first>; 4700 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4701 LValue FlagsLVal = CGF.EmitLValueForField( 4702 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4703 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4704 FlagsLVal); 4705 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4706 ++(*P); 4707 } else { 4708 LValue &PosLVal = *Pos.get<LValue *>(); 4709 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4710 Idx = CGF.Builder.CreateNUWAdd(Idx, 4711 llvm::ConstantInt::get(Idx->getType(), 1)); 4712 CGF.EmitStoreOfScalar(Idx, PosLVal); 4713 } 4714 } 4715 } 4716 4717 static SmallVector<llvm::Value *, 4> 4718 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4719 const OMPTaskDataTy::DependData &Data) { 4720 assert(Data.DepKind == OMPC_DEPEND_depobj && 4721 "Expected depobj dependecy kind."); 4722 SmallVector<llvm::Value *, 4> Sizes; 4723 SmallVector<LValue, 4> SizeLVals; 4724 ASTContext &C = CGF.getContext(); 4725 QualType FlagsTy; 4726 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4727 RecordDecl *KmpDependInfoRD = 4728 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4729 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4730 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4731 { 4732 OMPIteratorGeneratorScope IteratorScope( 4733 CGF, cast_or_null<OMPIteratorExpr>( 4734 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4735 : nullptr)); 4736 for (const Expr *E : Data.DepExprs) { 4737 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4738 LValue Base = CGF.EmitLoadOfPointerLValue( 4739 DepobjLVal.getAddress(CGF), 4740 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4741 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4742 Base.getAddress(CGF), KmpDependInfoPtrT); 4743 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4744 Base.getTBAAInfo()); 4745 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4746 Addr.getPointer(), 4747 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4748 LValue NumDepsBase = CGF.MakeAddrLValue( 4749 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4750 Base.getBaseInfo(), Base.getTBAAInfo()); 4751 // NumDeps = deps[i].base_addr; 4752 LValue BaseAddrLVal = CGF.EmitLValueForField( 4753 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4754 llvm::Value *NumDeps = 4755 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4756 LValue NumLVal = CGF.MakeAddrLValue( 4757 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4758 C.getUIntPtrType()); 4759 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4760 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4761 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4762 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4763 CGF.EmitStoreOfScalar(Add, NumLVal); 4764 SizeLVals.push_back(NumLVal); 4765 } 4766 } 4767 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4768 llvm::Value *Size = 4769 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4770 Sizes.push_back(Size); 4771 } 4772 return Sizes; 4773 } 4774 4775 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4776 LValue PosLVal, 4777 const OMPTaskDataTy::DependData &Data, 4778 Address DependenciesArray) { 4779 assert(Data.DepKind == OMPC_DEPEND_depobj && 4780 "Expected depobj dependecy kind."); 4781 ASTContext &C = CGF.getContext(); 4782 QualType FlagsTy; 4783 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4784 RecordDecl *KmpDependInfoRD = 4785 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4786 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4787 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4788 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4789 { 4790 OMPIteratorGeneratorScope IteratorScope( 4791 CGF, cast_or_null<OMPIteratorExpr>( 4792 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4793 : nullptr)); 4794 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4795 const Expr *E = Data.DepExprs[I]; 4796 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4797 LValue Base = CGF.EmitLoadOfPointerLValue( 4798 DepobjLVal.getAddress(CGF), 4799 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4800 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4801 Base.getAddress(CGF), KmpDependInfoPtrT); 4802 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4803 Base.getTBAAInfo()); 4804 4805 // Get number of elements in a single depobj. 4806 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4807 Addr.getPointer(), 4808 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4809 LValue NumDepsBase = CGF.MakeAddrLValue( 4810 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4811 Base.getBaseInfo(), Base.getTBAAInfo()); 4812 // NumDeps = deps[i].base_addr; 4813 LValue BaseAddrLVal = CGF.EmitLValueForField( 4814 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4815 llvm::Value *NumDeps = 4816 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4817 4818 // memcopy dependency data. 4819 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4820 ElSize, 4821 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4822 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4823 Address DepAddr = 4824 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4825 DependenciesArray.getAlignment()); 4826 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4827 4828 // Increase pos. 4829 // pos += size; 4830 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4831 CGF.EmitStoreOfScalar(Add, PosLVal); 4832 } 4833 } 4834 } 4835 4836 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4837 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4838 SourceLocation Loc) { 4839 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4840 return D.DepExprs.empty(); 4841 })) 4842 return std::make_pair(nullptr, Address::invalid()); 4843 // Process list of dependencies. 4844 ASTContext &C = CGM.getContext(); 4845 Address DependenciesArray = Address::invalid(); 4846 llvm::Value *NumOfElements = nullptr; 4847 unsigned NumDependencies = std::accumulate( 4848 Dependencies.begin(), Dependencies.end(), 0, 4849 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4850 return D.DepKind == OMPC_DEPEND_depobj 4851 ? V 4852 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4853 }); 4854 QualType FlagsTy; 4855 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4856 bool HasDepobjDeps = false; 4857 bool HasRegularWithIterators = false; 4858 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4859 llvm::Value *NumOfRegularWithIterators = 4860 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4861 // Calculate number of depobj dependecies and regular deps with the iterators. 4862 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4863 if (D.DepKind == OMPC_DEPEND_depobj) { 4864 SmallVector<llvm::Value *, 4> Sizes = 4865 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4866 for (llvm::Value *Size : Sizes) { 4867 NumOfDepobjElements = 4868 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4869 } 4870 HasDepobjDeps = true; 4871 continue; 4872 } 4873 // Include number of iterations, if any. 4874 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4875 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4876 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4877 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4878 NumOfRegularWithIterators = 4879 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4880 } 4881 HasRegularWithIterators = true; 4882 continue; 4883 } 4884 } 4885 4886 QualType KmpDependInfoArrayTy; 4887 if (HasDepobjDeps || HasRegularWithIterators) { 4888 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4889 /*isSigned=*/false); 4890 if (HasDepobjDeps) { 4891 NumOfElements = 4892 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4893 } 4894 if (HasRegularWithIterators) { 4895 NumOfElements = 4896 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4897 } 4898 OpaqueValueExpr OVE(Loc, 4899 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4900 VK_RValue); 4901 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4902 RValue::get(NumOfElements)); 4903 KmpDependInfoArrayTy = 4904 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4905 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4906 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4907 // Properly emit variable-sized array. 4908 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4909 ImplicitParamDecl::Other); 4910 CGF.EmitVarDecl(*PD); 4911 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4912 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4913 /*isSigned=*/false); 4914 } else { 4915 KmpDependInfoArrayTy = C.getConstantArrayType( 4916 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4917 ArrayType::Normal, /*IndexTypeQuals=*/0); 4918 DependenciesArray = 4919 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4920 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4921 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4922 /*isSigned=*/false); 4923 } 4924 unsigned Pos = 0; 4925 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4926 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4927 Dependencies[I].IteratorExpr) 4928 continue; 4929 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4930 DependenciesArray); 4931 } 4932 // Copy regular dependecies with iterators. 4933 LValue PosLVal = CGF.MakeAddrLValue( 4934 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4935 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4936 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4937 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4938 !Dependencies[I].IteratorExpr) 4939 continue; 4940 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4941 DependenciesArray); 4942 } 4943 // Copy final depobj arrays without iterators. 4944 if (HasDepobjDeps) { 4945 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4946 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4947 continue; 4948 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4949 DependenciesArray); 4950 } 4951 } 4952 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4953 DependenciesArray, CGF.VoidPtrTy); 4954 return std::make_pair(NumOfElements, DependenciesArray); 4955 } 4956 4957 Address CGOpenMPRuntime::emitDepobjDependClause( 4958 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4959 SourceLocation Loc) { 4960 if (Dependencies.DepExprs.empty()) 4961 return Address::invalid(); 4962 // Process list of dependencies. 4963 ASTContext &C = CGM.getContext(); 4964 Address DependenciesArray = Address::invalid(); 4965 unsigned NumDependencies = Dependencies.DepExprs.size(); 4966 QualType FlagsTy; 4967 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4968 RecordDecl *KmpDependInfoRD = 4969 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4970 4971 llvm::Value *Size; 4972 // Define type kmp_depend_info[<Dependencies.size()>]; 4973 // For depobj reserve one extra element to store the number of elements. 4974 // It is required to handle depobj(x) update(in) construct. 4975 // kmp_depend_info[<Dependencies.size()>] deps; 4976 llvm::Value *NumDepsVal; 4977 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4978 if (const auto *IE = 4979 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4980 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4981 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4982 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4983 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4984 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4985 } 4986 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4987 NumDepsVal); 4988 CharUnits SizeInBytes = 4989 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4990 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4991 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4992 NumDepsVal = 4993 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4994 } else { 4995 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4996 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4997 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4998 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4999 Size = CGM.getSize(Sz.alignTo(Align)); 5000 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5001 } 5002 // Need to allocate on the dynamic memory. 5003 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5004 // Use default allocator. 5005 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5006 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5007 5008 llvm::Value *Addr = 5009 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5010 CGM.getModule(), OMPRTL___kmpc_alloc), 5011 Args, ".dep.arr.addr"); 5012 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5013 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5014 DependenciesArray = Address(Addr, Align); 5015 // Write number of elements in the first element of array for depobj. 5016 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5017 // deps[i].base_addr = NumDependencies; 5018 LValue BaseAddrLVal = CGF.EmitLValueForField( 5019 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5020 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5021 llvm::PointerUnion<unsigned *, LValue *> Pos; 5022 unsigned Idx = 1; 5023 LValue PosLVal; 5024 if (Dependencies.IteratorExpr) { 5025 PosLVal = CGF.MakeAddrLValue( 5026 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5027 C.getSizeType()); 5028 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5029 /*IsInit=*/true); 5030 Pos = &PosLVal; 5031 } else { 5032 Pos = &Idx; 5033 } 5034 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5035 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5036 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5037 return DependenciesArray; 5038 } 5039 5040 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5041 SourceLocation Loc) { 5042 ASTContext &C = CGM.getContext(); 5043 QualType FlagsTy; 5044 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5045 LValue Base = CGF.EmitLoadOfPointerLValue( 5046 DepobjLVal.getAddress(CGF), 5047 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5048 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5049 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5050 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5051 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5052 Addr.getPointer(), 5053 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5054 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5055 CGF.VoidPtrTy); 5056 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5057 // Use default allocator. 5058 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5059 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5060 5061 // _kmpc_free(gtid, addr, nullptr); 5062 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5063 CGM.getModule(), OMPRTL___kmpc_free), 5064 Args); 5065 } 5066 5067 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5068 OpenMPDependClauseKind NewDepKind, 5069 SourceLocation Loc) { 5070 ASTContext &C = CGM.getContext(); 5071 QualType FlagsTy; 5072 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5073 RecordDecl *KmpDependInfoRD = 5074 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5075 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5076 llvm::Value *NumDeps; 5077 LValue Base; 5078 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5079 5080 Address Begin = Base.getAddress(CGF); 5081 // Cast from pointer to array type to pointer to single element. 5082 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5083 // The basic structure here is a while-do loop. 5084 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5085 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5086 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5087 CGF.EmitBlock(BodyBB); 5088 llvm::PHINode *ElementPHI = 5089 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5090 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5091 Begin = Address(ElementPHI, Begin.getAlignment()); 5092 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5093 Base.getTBAAInfo()); 5094 // deps[i].flags = NewDepKind; 5095 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5096 LValue FlagsLVal = CGF.EmitLValueForField( 5097 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5098 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5099 FlagsLVal); 5100 5101 // Shift the address forward by one element. 5102 Address ElementNext = 5103 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5104 ElementPHI->addIncoming(ElementNext.getPointer(), 5105 CGF.Builder.GetInsertBlock()); 5106 llvm::Value *IsEmpty = 5107 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5108 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5109 // Done. 5110 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5111 } 5112 5113 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5114 const OMPExecutableDirective &D, 5115 llvm::Function *TaskFunction, 5116 QualType SharedsTy, Address Shareds, 5117 const Expr *IfCond, 5118 const OMPTaskDataTy &Data) { 5119 if (!CGF.HaveInsertPoint()) 5120 return; 5121 5122 TaskResultTy Result = 5123 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5124 llvm::Value *NewTask = Result.NewTask; 5125 llvm::Function *TaskEntry = Result.TaskEntry; 5126 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5127 LValue TDBase = Result.TDBase; 5128 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5129 // Process list of dependences. 5130 Address DependenciesArray = Address::invalid(); 5131 llvm::Value *NumOfElements; 5132 std::tie(NumOfElements, DependenciesArray) = 5133 emitDependClause(CGF, Data.Dependences, Loc); 5134 5135 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5136 // libcall. 5137 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5138 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5139 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5140 // list is not empty 5141 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5142 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5143 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5144 llvm::Value *DepTaskArgs[7]; 5145 if (!Data.Dependences.empty()) { 5146 DepTaskArgs[0] = UpLoc; 5147 DepTaskArgs[1] = ThreadID; 5148 DepTaskArgs[2] = NewTask; 5149 DepTaskArgs[3] = NumOfElements; 5150 DepTaskArgs[4] = DependenciesArray.getPointer(); 5151 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5152 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5153 } 5154 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5155 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5156 if (!Data.Tied) { 5157 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5158 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5159 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5160 } 5161 if (!Data.Dependences.empty()) { 5162 CGF.EmitRuntimeCall( 5163 OMPBuilder.getOrCreateRuntimeFunction( 5164 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5165 DepTaskArgs); 5166 } else { 5167 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5168 CGM.getModule(), OMPRTL___kmpc_omp_task), 5169 TaskArgs); 5170 } 5171 // Check if parent region is untied and build return for untied task; 5172 if (auto *Region = 5173 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5174 Region->emitUntiedSwitch(CGF); 5175 }; 5176 5177 llvm::Value *DepWaitTaskArgs[6]; 5178 if (!Data.Dependences.empty()) { 5179 DepWaitTaskArgs[0] = UpLoc; 5180 DepWaitTaskArgs[1] = ThreadID; 5181 DepWaitTaskArgs[2] = NumOfElements; 5182 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5183 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5184 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5185 } 5186 auto &M = CGM.getModule(); 5187 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5188 TaskEntry, &Data, &DepWaitTaskArgs, 5189 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5190 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5191 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5192 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5193 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5194 // is specified. 5195 if (!Data.Dependences.empty()) 5196 CGF.EmitRuntimeCall( 5197 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5198 DepWaitTaskArgs); 5199 // Call proxy_task_entry(gtid, new_task); 5200 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5201 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5202 Action.Enter(CGF); 5203 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5204 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5205 OutlinedFnArgs); 5206 }; 5207 5208 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5209 // kmp_task_t *new_task); 5210 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5211 // kmp_task_t *new_task); 5212 RegionCodeGenTy RCG(CodeGen); 5213 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5214 M, OMPRTL___kmpc_omp_task_begin_if0), 5215 TaskArgs, 5216 OMPBuilder.getOrCreateRuntimeFunction( 5217 M, OMPRTL___kmpc_omp_task_complete_if0), 5218 TaskArgs); 5219 RCG.setAction(Action); 5220 RCG(CGF); 5221 }; 5222 5223 if (IfCond) { 5224 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5225 } else { 5226 RegionCodeGenTy ThenRCG(ThenCodeGen); 5227 ThenRCG(CGF); 5228 } 5229 } 5230 5231 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5232 const OMPLoopDirective &D, 5233 llvm::Function *TaskFunction, 5234 QualType SharedsTy, Address Shareds, 5235 const Expr *IfCond, 5236 const OMPTaskDataTy &Data) { 5237 if (!CGF.HaveInsertPoint()) 5238 return; 5239 TaskResultTy Result = 5240 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5241 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5242 // libcall. 5243 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5244 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5245 // sched, kmp_uint64 grainsize, void *task_dup); 5246 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5247 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5248 llvm::Value *IfVal; 5249 if (IfCond) { 5250 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5251 /*isSigned=*/true); 5252 } else { 5253 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5254 } 5255 5256 LValue LBLVal = CGF.EmitLValueForField( 5257 Result.TDBase, 5258 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5259 const auto *LBVar = 5260 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5261 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5262 LBLVal.getQuals(), 5263 /*IsInitializer=*/true); 5264 LValue UBLVal = CGF.EmitLValueForField( 5265 Result.TDBase, 5266 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5267 const auto *UBVar = 5268 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5269 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5270 UBLVal.getQuals(), 5271 /*IsInitializer=*/true); 5272 LValue StLVal = CGF.EmitLValueForField( 5273 Result.TDBase, 5274 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5275 const auto *StVar = 5276 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5277 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5278 StLVal.getQuals(), 5279 /*IsInitializer=*/true); 5280 // Store reductions address. 5281 LValue RedLVal = CGF.EmitLValueForField( 5282 Result.TDBase, 5283 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5284 if (Data.Reductions) { 5285 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5286 } else { 5287 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5288 CGF.getContext().VoidPtrTy); 5289 } 5290 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5291 llvm::Value *TaskArgs[] = { 5292 UpLoc, 5293 ThreadID, 5294 Result.NewTask, 5295 IfVal, 5296 LBLVal.getPointer(CGF), 5297 UBLVal.getPointer(CGF), 5298 CGF.EmitLoadOfScalar(StLVal, Loc), 5299 llvm::ConstantInt::getSigned( 5300 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5301 llvm::ConstantInt::getSigned( 5302 CGF.IntTy, Data.Schedule.getPointer() 5303 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5304 : NoSchedule), 5305 Data.Schedule.getPointer() 5306 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5307 /*isSigned=*/false) 5308 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5309 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5310 Result.TaskDupFn, CGF.VoidPtrTy) 5311 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5312 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5313 CGM.getModule(), OMPRTL___kmpc_taskloop), 5314 TaskArgs); 5315 } 5316 5317 /// Emit reduction operation for each element of array (required for 5318 /// array sections) LHS op = RHS. 5319 /// \param Type Type of array. 5320 /// \param LHSVar Variable on the left side of the reduction operation 5321 /// (references element of array in original variable). 5322 /// \param RHSVar Variable on the right side of the reduction operation 5323 /// (references element of array in original variable). 5324 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5325 /// RHSVar. 5326 static void EmitOMPAggregateReduction( 5327 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5328 const VarDecl *RHSVar, 5329 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5330 const Expr *, const Expr *)> &RedOpGen, 5331 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5332 const Expr *UpExpr = nullptr) { 5333 // Perform element-by-element initialization. 5334 QualType ElementTy; 5335 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5336 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5337 5338 // Drill down to the base element type on both arrays. 5339 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5340 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5341 5342 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5343 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5344 // Cast from pointer to array type to pointer to single element. 5345 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5346 // The basic structure here is a while-do loop. 5347 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5348 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5349 llvm::Value *IsEmpty = 5350 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5351 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5352 5353 // Enter the loop body, making that address the current address. 5354 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5355 CGF.EmitBlock(BodyBB); 5356 5357 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5358 5359 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5360 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5361 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5362 Address RHSElementCurrent = 5363 Address(RHSElementPHI, 5364 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5365 5366 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5367 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5368 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5369 Address LHSElementCurrent = 5370 Address(LHSElementPHI, 5371 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5372 5373 // Emit copy. 5374 CodeGenFunction::OMPPrivateScope Scope(CGF); 5375 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5376 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5377 Scope.Privatize(); 5378 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5379 Scope.ForceCleanup(); 5380 5381 // Shift the address forward by one element. 5382 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5383 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5384 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5385 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5386 // Check whether we've reached the end. 5387 llvm::Value *Done = 5388 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5389 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5390 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5391 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5392 5393 // Done. 5394 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5395 } 5396 5397 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5398 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5399 /// UDR combiner function. 5400 static void emitReductionCombiner(CodeGenFunction &CGF, 5401 const Expr *ReductionOp) { 5402 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5403 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5404 if (const auto *DRE = 5405 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5406 if (const auto *DRD = 5407 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5408 std::pair<llvm::Function *, llvm::Function *> Reduction = 5409 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5410 RValue Func = RValue::get(Reduction.first); 5411 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5412 CGF.EmitIgnoredExpr(ReductionOp); 5413 return; 5414 } 5415 CGF.EmitIgnoredExpr(ReductionOp); 5416 } 5417 5418 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5419 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5420 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5421 ArrayRef<const Expr *> ReductionOps) { 5422 ASTContext &C = CGM.getContext(); 5423 5424 // void reduction_func(void *LHSArg, void *RHSArg); 5425 FunctionArgList Args; 5426 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5427 ImplicitParamDecl::Other); 5428 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5429 ImplicitParamDecl::Other); 5430 Args.push_back(&LHSArg); 5431 Args.push_back(&RHSArg); 5432 const auto &CGFI = 5433 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5434 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5435 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5436 llvm::GlobalValue::InternalLinkage, Name, 5437 &CGM.getModule()); 5438 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5439 Fn->setDoesNotRecurse(); 5440 CodeGenFunction CGF(CGM); 5441 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5442 5443 // Dst = (void*[n])(LHSArg); 5444 // Src = (void*[n])(RHSArg); 5445 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5446 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5447 ArgsType), CGF.getPointerAlign()); 5448 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5449 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5450 ArgsType), CGF.getPointerAlign()); 5451 5452 // ... 5453 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5454 // ... 5455 CodeGenFunction::OMPPrivateScope Scope(CGF); 5456 auto IPriv = Privates.begin(); 5457 unsigned Idx = 0; 5458 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5459 const auto *RHSVar = 5460 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5461 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5462 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5463 }); 5464 const auto *LHSVar = 5465 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5466 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5467 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5468 }); 5469 QualType PrivTy = (*IPriv)->getType(); 5470 if (PrivTy->isVariablyModifiedType()) { 5471 // Get array size and emit VLA type. 5472 ++Idx; 5473 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5474 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5475 const VariableArrayType *VLA = 5476 CGF.getContext().getAsVariableArrayType(PrivTy); 5477 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5478 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5479 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5480 CGF.EmitVariablyModifiedType(PrivTy); 5481 } 5482 } 5483 Scope.Privatize(); 5484 IPriv = Privates.begin(); 5485 auto ILHS = LHSExprs.begin(); 5486 auto IRHS = RHSExprs.begin(); 5487 for (const Expr *E : ReductionOps) { 5488 if ((*IPriv)->getType()->isArrayType()) { 5489 // Emit reduction for array section. 5490 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5491 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5492 EmitOMPAggregateReduction( 5493 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5494 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5495 emitReductionCombiner(CGF, E); 5496 }); 5497 } else { 5498 // Emit reduction for array subscript or single variable. 5499 emitReductionCombiner(CGF, E); 5500 } 5501 ++IPriv; 5502 ++ILHS; 5503 ++IRHS; 5504 } 5505 Scope.ForceCleanup(); 5506 CGF.FinishFunction(); 5507 return Fn; 5508 } 5509 5510 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5511 const Expr *ReductionOp, 5512 const Expr *PrivateRef, 5513 const DeclRefExpr *LHS, 5514 const DeclRefExpr *RHS) { 5515 if (PrivateRef->getType()->isArrayType()) { 5516 // Emit reduction for array section. 5517 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5518 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5519 EmitOMPAggregateReduction( 5520 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5521 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5522 emitReductionCombiner(CGF, ReductionOp); 5523 }); 5524 } else { 5525 // Emit reduction for array subscript or single variable. 5526 emitReductionCombiner(CGF, ReductionOp); 5527 } 5528 } 5529 5530 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5531 ArrayRef<const Expr *> Privates, 5532 ArrayRef<const Expr *> LHSExprs, 5533 ArrayRef<const Expr *> RHSExprs, 5534 ArrayRef<const Expr *> ReductionOps, 5535 ReductionOptionsTy Options) { 5536 if (!CGF.HaveInsertPoint()) 5537 return; 5538 5539 bool WithNowait = Options.WithNowait; 5540 bool SimpleReduction = Options.SimpleReduction; 5541 5542 // Next code should be emitted for reduction: 5543 // 5544 // static kmp_critical_name lock = { 0 }; 5545 // 5546 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5547 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5548 // ... 5549 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5550 // *(Type<n>-1*)rhs[<n>-1]); 5551 // } 5552 // 5553 // ... 5554 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5555 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5556 // RedList, reduce_func, &<lock>)) { 5557 // case 1: 5558 // ... 5559 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5560 // ... 5561 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5562 // break; 5563 // case 2: 5564 // ... 5565 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5566 // ... 5567 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5568 // break; 5569 // default:; 5570 // } 5571 // 5572 // if SimpleReduction is true, only the next code is generated: 5573 // ... 5574 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5575 // ... 5576 5577 ASTContext &C = CGM.getContext(); 5578 5579 if (SimpleReduction) { 5580 CodeGenFunction::RunCleanupsScope Scope(CGF); 5581 auto IPriv = Privates.begin(); 5582 auto ILHS = LHSExprs.begin(); 5583 auto IRHS = RHSExprs.begin(); 5584 for (const Expr *E : ReductionOps) { 5585 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5586 cast<DeclRefExpr>(*IRHS)); 5587 ++IPriv; 5588 ++ILHS; 5589 ++IRHS; 5590 } 5591 return; 5592 } 5593 5594 // 1. Build a list of reduction variables. 5595 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5596 auto Size = RHSExprs.size(); 5597 for (const Expr *E : Privates) { 5598 if (E->getType()->isVariablyModifiedType()) 5599 // Reserve place for array size. 5600 ++Size; 5601 } 5602 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5603 QualType ReductionArrayTy = 5604 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5605 /*IndexTypeQuals=*/0); 5606 Address ReductionList = 5607 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5608 auto IPriv = Privates.begin(); 5609 unsigned Idx = 0; 5610 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5611 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5612 CGF.Builder.CreateStore( 5613 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5614 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5615 Elem); 5616 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5617 // Store array size. 5618 ++Idx; 5619 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5620 llvm::Value *Size = CGF.Builder.CreateIntCast( 5621 CGF.getVLASize( 5622 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5623 .NumElts, 5624 CGF.SizeTy, /*isSigned=*/false); 5625 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5626 Elem); 5627 } 5628 } 5629 5630 // 2. Emit reduce_func(). 5631 llvm::Function *ReductionFn = emitReductionFunction( 5632 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5633 LHSExprs, RHSExprs, ReductionOps); 5634 5635 // 3. Create static kmp_critical_name lock = { 0 }; 5636 std::string Name = getName({"reduction"}); 5637 llvm::Value *Lock = getCriticalRegionLock(Name); 5638 5639 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5640 // RedList, reduce_func, &<lock>); 5641 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5642 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5643 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5644 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5645 ReductionList.getPointer(), CGF.VoidPtrTy); 5646 llvm::Value *Args[] = { 5647 IdentTLoc, // ident_t *<loc> 5648 ThreadId, // i32 <gtid> 5649 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5650 ReductionArrayTySize, // size_type sizeof(RedList) 5651 RL, // void *RedList 5652 ReductionFn, // void (*) (void *, void *) <reduce_func> 5653 Lock // kmp_critical_name *&<lock> 5654 }; 5655 llvm::Value *Res = CGF.EmitRuntimeCall( 5656 OMPBuilder.getOrCreateRuntimeFunction( 5657 CGM.getModule(), 5658 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5659 Args); 5660 5661 // 5. Build switch(res) 5662 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5663 llvm::SwitchInst *SwInst = 5664 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5665 5666 // 6. Build case 1: 5667 // ... 5668 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5669 // ... 5670 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5671 // break; 5672 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5673 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5674 CGF.EmitBlock(Case1BB); 5675 5676 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5677 llvm::Value *EndArgs[] = { 5678 IdentTLoc, // ident_t *<loc> 5679 ThreadId, // i32 <gtid> 5680 Lock // kmp_critical_name *&<lock> 5681 }; 5682 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5683 CodeGenFunction &CGF, PrePostActionTy &Action) { 5684 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5685 auto IPriv = Privates.begin(); 5686 auto ILHS = LHSExprs.begin(); 5687 auto IRHS = RHSExprs.begin(); 5688 for (const Expr *E : ReductionOps) { 5689 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5690 cast<DeclRefExpr>(*IRHS)); 5691 ++IPriv; 5692 ++ILHS; 5693 ++IRHS; 5694 } 5695 }; 5696 RegionCodeGenTy RCG(CodeGen); 5697 CommonActionTy Action( 5698 nullptr, llvm::None, 5699 OMPBuilder.getOrCreateRuntimeFunction( 5700 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5701 : OMPRTL___kmpc_end_reduce), 5702 EndArgs); 5703 RCG.setAction(Action); 5704 RCG(CGF); 5705 5706 CGF.EmitBranch(DefaultBB); 5707 5708 // 7. Build case 2: 5709 // ... 5710 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5711 // ... 5712 // break; 5713 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5714 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5715 CGF.EmitBlock(Case2BB); 5716 5717 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5718 CodeGenFunction &CGF, PrePostActionTy &Action) { 5719 auto ILHS = LHSExprs.begin(); 5720 auto IRHS = RHSExprs.begin(); 5721 auto IPriv = Privates.begin(); 5722 for (const Expr *E : ReductionOps) { 5723 const Expr *XExpr = nullptr; 5724 const Expr *EExpr = nullptr; 5725 const Expr *UpExpr = nullptr; 5726 BinaryOperatorKind BO = BO_Comma; 5727 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5728 if (BO->getOpcode() == BO_Assign) { 5729 XExpr = BO->getLHS(); 5730 UpExpr = BO->getRHS(); 5731 } 5732 } 5733 // Try to emit update expression as a simple atomic. 5734 const Expr *RHSExpr = UpExpr; 5735 if (RHSExpr) { 5736 // Analyze RHS part of the whole expression. 5737 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5738 RHSExpr->IgnoreParenImpCasts())) { 5739 // If this is a conditional operator, analyze its condition for 5740 // min/max reduction operator. 5741 RHSExpr = ACO->getCond(); 5742 } 5743 if (const auto *BORHS = 5744 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5745 EExpr = BORHS->getRHS(); 5746 BO = BORHS->getOpcode(); 5747 } 5748 } 5749 if (XExpr) { 5750 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5751 auto &&AtomicRedGen = [BO, VD, 5752 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5753 const Expr *EExpr, const Expr *UpExpr) { 5754 LValue X = CGF.EmitLValue(XExpr); 5755 RValue E; 5756 if (EExpr) 5757 E = CGF.EmitAnyExpr(EExpr); 5758 CGF.EmitOMPAtomicSimpleUpdateExpr( 5759 X, E, BO, /*IsXLHSInRHSPart=*/true, 5760 llvm::AtomicOrdering::Monotonic, Loc, 5761 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5762 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5763 PrivateScope.addPrivate( 5764 VD, [&CGF, VD, XRValue, Loc]() { 5765 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5766 CGF.emitOMPSimpleStore( 5767 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5768 VD->getType().getNonReferenceType(), Loc); 5769 return LHSTemp; 5770 }); 5771 (void)PrivateScope.Privatize(); 5772 return CGF.EmitAnyExpr(UpExpr); 5773 }); 5774 }; 5775 if ((*IPriv)->getType()->isArrayType()) { 5776 // Emit atomic reduction for array section. 5777 const auto *RHSVar = 5778 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5779 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5780 AtomicRedGen, XExpr, EExpr, UpExpr); 5781 } else { 5782 // Emit atomic reduction for array subscript or single variable. 5783 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5784 } 5785 } else { 5786 // Emit as a critical region. 5787 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5788 const Expr *, const Expr *) { 5789 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5790 std::string Name = RT.getName({"atomic_reduction"}); 5791 RT.emitCriticalRegion( 5792 CGF, Name, 5793 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5794 Action.Enter(CGF); 5795 emitReductionCombiner(CGF, E); 5796 }, 5797 Loc); 5798 }; 5799 if ((*IPriv)->getType()->isArrayType()) { 5800 const auto *LHSVar = 5801 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5802 const auto *RHSVar = 5803 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5804 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5805 CritRedGen); 5806 } else { 5807 CritRedGen(CGF, nullptr, nullptr, nullptr); 5808 } 5809 } 5810 ++ILHS; 5811 ++IRHS; 5812 ++IPriv; 5813 } 5814 }; 5815 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5816 if (!WithNowait) { 5817 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5818 llvm::Value *EndArgs[] = { 5819 IdentTLoc, // ident_t *<loc> 5820 ThreadId, // i32 <gtid> 5821 Lock // kmp_critical_name *&<lock> 5822 }; 5823 CommonActionTy Action(nullptr, llvm::None, 5824 OMPBuilder.getOrCreateRuntimeFunction( 5825 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5826 EndArgs); 5827 AtomicRCG.setAction(Action); 5828 AtomicRCG(CGF); 5829 } else { 5830 AtomicRCG(CGF); 5831 } 5832 5833 CGF.EmitBranch(DefaultBB); 5834 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5835 } 5836 5837 /// Generates unique name for artificial threadprivate variables. 5838 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5839 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5840 const Expr *Ref) { 5841 SmallString<256> Buffer; 5842 llvm::raw_svector_ostream Out(Buffer); 5843 const clang::DeclRefExpr *DE; 5844 const VarDecl *D = ::getBaseDecl(Ref, DE); 5845 if (!D) 5846 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5847 D = D->getCanonicalDecl(); 5848 std::string Name = CGM.getOpenMPRuntime().getName( 5849 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5850 Out << Prefix << Name << "_" 5851 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5852 return std::string(Out.str()); 5853 } 5854 5855 /// Emits reduction initializer function: 5856 /// \code 5857 /// void @.red_init(void* %arg, void* %orig) { 5858 /// %0 = bitcast void* %arg to <type>* 5859 /// store <type> <init>, <type>* %0 5860 /// ret void 5861 /// } 5862 /// \endcode 5863 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5864 SourceLocation Loc, 5865 ReductionCodeGen &RCG, unsigned N) { 5866 ASTContext &C = CGM.getContext(); 5867 QualType VoidPtrTy = C.VoidPtrTy; 5868 VoidPtrTy.addRestrict(); 5869 FunctionArgList Args; 5870 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5871 ImplicitParamDecl::Other); 5872 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5873 ImplicitParamDecl::Other); 5874 Args.emplace_back(&Param); 5875 Args.emplace_back(&ParamOrig); 5876 const auto &FnInfo = 5877 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5878 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5879 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5880 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5881 Name, &CGM.getModule()); 5882 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5883 Fn->setDoesNotRecurse(); 5884 CodeGenFunction CGF(CGM); 5885 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5886 Address PrivateAddr = CGF.EmitLoadOfPointer( 5887 CGF.GetAddrOfLocalVar(&Param), 5888 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5889 llvm::Value *Size = nullptr; 5890 // If the size of the reduction item is non-constant, load it from global 5891 // threadprivate variable. 5892 if (RCG.getSizes(N).second) { 5893 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5894 CGF, CGM.getContext().getSizeType(), 5895 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5896 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5897 CGM.getContext().getSizeType(), Loc); 5898 } 5899 RCG.emitAggregateType(CGF, N, Size); 5900 LValue OrigLVal; 5901 // If initializer uses initializer from declare reduction construct, emit a 5902 // pointer to the address of the original reduction item (reuired by reduction 5903 // initializer) 5904 if (RCG.usesReductionInitializer(N)) { 5905 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5906 SharedAddr = CGF.EmitLoadOfPointer( 5907 SharedAddr, 5908 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5909 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5910 } else { 5911 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5912 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5913 CGM.getContext().VoidPtrTy); 5914 } 5915 // Emit the initializer: 5916 // %0 = bitcast void* %arg to <type>* 5917 // store <type> <init>, <type>* %0 5918 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5919 [](CodeGenFunction &) { return false; }); 5920 CGF.FinishFunction(); 5921 return Fn; 5922 } 5923 5924 /// Emits reduction combiner function: 5925 /// \code 5926 /// void @.red_comb(void* %arg0, void* %arg1) { 5927 /// %lhs = bitcast void* %arg0 to <type>* 5928 /// %rhs = bitcast void* %arg1 to <type>* 5929 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5930 /// store <type> %2, <type>* %lhs 5931 /// ret void 5932 /// } 5933 /// \endcode 5934 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5935 SourceLocation Loc, 5936 ReductionCodeGen &RCG, unsigned N, 5937 const Expr *ReductionOp, 5938 const Expr *LHS, const Expr *RHS, 5939 const Expr *PrivateRef) { 5940 ASTContext &C = CGM.getContext(); 5941 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5942 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5943 FunctionArgList Args; 5944 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5945 C.VoidPtrTy, ImplicitParamDecl::Other); 5946 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5947 ImplicitParamDecl::Other); 5948 Args.emplace_back(&ParamInOut); 5949 Args.emplace_back(&ParamIn); 5950 const auto &FnInfo = 5951 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5952 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5953 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5954 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5955 Name, &CGM.getModule()); 5956 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5957 Fn->setDoesNotRecurse(); 5958 CodeGenFunction CGF(CGM); 5959 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5960 llvm::Value *Size = nullptr; 5961 // If the size of the reduction item is non-constant, load it from global 5962 // threadprivate variable. 5963 if (RCG.getSizes(N).second) { 5964 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5965 CGF, CGM.getContext().getSizeType(), 5966 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5967 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5968 CGM.getContext().getSizeType(), Loc); 5969 } 5970 RCG.emitAggregateType(CGF, N, Size); 5971 // Remap lhs and rhs variables to the addresses of the function arguments. 5972 // %lhs = bitcast void* %arg0 to <type>* 5973 // %rhs = bitcast void* %arg1 to <type>* 5974 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5975 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5976 // Pull out the pointer to the variable. 5977 Address PtrAddr = CGF.EmitLoadOfPointer( 5978 CGF.GetAddrOfLocalVar(&ParamInOut), 5979 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5980 return CGF.Builder.CreateElementBitCast( 5981 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5982 }); 5983 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5984 // Pull out the pointer to the variable. 5985 Address PtrAddr = CGF.EmitLoadOfPointer( 5986 CGF.GetAddrOfLocalVar(&ParamIn), 5987 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5988 return CGF.Builder.CreateElementBitCast( 5989 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5990 }); 5991 PrivateScope.Privatize(); 5992 // Emit the combiner body: 5993 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5994 // store <type> %2, <type>* %lhs 5995 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5996 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5997 cast<DeclRefExpr>(RHS)); 5998 CGF.FinishFunction(); 5999 return Fn; 6000 } 6001 6002 /// Emits reduction finalizer function: 6003 /// \code 6004 /// void @.red_fini(void* %arg) { 6005 /// %0 = bitcast void* %arg to <type>* 6006 /// <destroy>(<type>* %0) 6007 /// ret void 6008 /// } 6009 /// \endcode 6010 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6011 SourceLocation Loc, 6012 ReductionCodeGen &RCG, unsigned N) { 6013 if (!RCG.needCleanups(N)) 6014 return nullptr; 6015 ASTContext &C = CGM.getContext(); 6016 FunctionArgList Args; 6017 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6018 ImplicitParamDecl::Other); 6019 Args.emplace_back(&Param); 6020 const auto &FnInfo = 6021 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6022 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6023 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6024 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6025 Name, &CGM.getModule()); 6026 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6027 Fn->setDoesNotRecurse(); 6028 CodeGenFunction CGF(CGM); 6029 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6030 Address PrivateAddr = CGF.EmitLoadOfPointer( 6031 CGF.GetAddrOfLocalVar(&Param), 6032 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6033 llvm::Value *Size = nullptr; 6034 // If the size of the reduction item is non-constant, load it from global 6035 // threadprivate variable. 6036 if (RCG.getSizes(N).second) { 6037 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6038 CGF, CGM.getContext().getSizeType(), 6039 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6040 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6041 CGM.getContext().getSizeType(), Loc); 6042 } 6043 RCG.emitAggregateType(CGF, N, Size); 6044 // Emit the finalizer body: 6045 // <destroy>(<type>* %0) 6046 RCG.emitCleanups(CGF, N, PrivateAddr); 6047 CGF.FinishFunction(Loc); 6048 return Fn; 6049 } 6050 6051 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6052 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6053 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6054 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6055 return nullptr; 6056 6057 // Build typedef struct: 6058 // kmp_taskred_input { 6059 // void *reduce_shar; // shared reduction item 6060 // void *reduce_orig; // original reduction item used for initialization 6061 // size_t reduce_size; // size of data item 6062 // void *reduce_init; // data initialization routine 6063 // void *reduce_fini; // data finalization routine 6064 // void *reduce_comb; // data combiner routine 6065 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6066 // } kmp_taskred_input_t; 6067 ASTContext &C = CGM.getContext(); 6068 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6069 RD->startDefinition(); 6070 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6071 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6072 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6073 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6074 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6075 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6076 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6077 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6078 RD->completeDefinition(); 6079 QualType RDType = C.getRecordType(RD); 6080 unsigned Size = Data.ReductionVars.size(); 6081 llvm::APInt ArraySize(/*numBits=*/64, Size); 6082 QualType ArrayRDType = C.getConstantArrayType( 6083 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6084 // kmp_task_red_input_t .rd_input.[Size]; 6085 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6086 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6087 Data.ReductionCopies, Data.ReductionOps); 6088 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6089 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6090 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6091 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6092 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6093 TaskRedInput.getPointer(), Idxs, 6094 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6095 ".rd_input.gep."); 6096 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6097 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6098 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6099 RCG.emitSharedOrigLValue(CGF, Cnt); 6100 llvm::Value *CastedShared = 6101 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6102 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6103 // ElemLVal.reduce_orig = &Origs[Cnt]; 6104 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6105 llvm::Value *CastedOrig = 6106 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6107 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6108 RCG.emitAggregateType(CGF, Cnt); 6109 llvm::Value *SizeValInChars; 6110 llvm::Value *SizeVal; 6111 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6112 // We use delayed creation/initialization for VLAs and array sections. It is 6113 // required because runtime does not provide the way to pass the sizes of 6114 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6115 // threadprivate global variables are used to store these values and use 6116 // them in the functions. 6117 bool DelayedCreation = !!SizeVal; 6118 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6119 /*isSigned=*/false); 6120 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6121 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6122 // ElemLVal.reduce_init = init; 6123 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6124 llvm::Value *InitAddr = 6125 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6126 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6127 // ElemLVal.reduce_fini = fini; 6128 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6129 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6130 llvm::Value *FiniAddr = Fini 6131 ? CGF.EmitCastToVoidPtr(Fini) 6132 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6133 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6134 // ElemLVal.reduce_comb = comb; 6135 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6136 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6137 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6138 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6139 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6140 // ElemLVal.flags = 0; 6141 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6142 if (DelayedCreation) { 6143 CGF.EmitStoreOfScalar( 6144 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6145 FlagsLVal); 6146 } else 6147 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6148 FlagsLVal.getType()); 6149 } 6150 if (Data.IsReductionWithTaskMod) { 6151 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6152 // is_ws, int num, void *data); 6153 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6154 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6155 CGM.IntTy, /*isSigned=*/true); 6156 llvm::Value *Args[] = { 6157 IdentTLoc, GTid, 6158 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6159 /*isSigned=*/true), 6160 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6161 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6162 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6163 return CGF.EmitRuntimeCall( 6164 OMPBuilder.getOrCreateRuntimeFunction( 6165 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6166 Args); 6167 } 6168 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6169 llvm::Value *Args[] = { 6170 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6171 /*isSigned=*/true), 6172 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6173 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6174 CGM.VoidPtrTy)}; 6175 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6176 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6177 Args); 6178 } 6179 6180 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6181 SourceLocation Loc, 6182 bool IsWorksharingReduction) { 6183 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6184 // is_ws, int num, void *data); 6185 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6186 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6187 CGM.IntTy, /*isSigned=*/true); 6188 llvm::Value *Args[] = {IdentTLoc, GTid, 6189 llvm::ConstantInt::get(CGM.IntTy, 6190 IsWorksharingReduction ? 1 : 0, 6191 /*isSigned=*/true)}; 6192 (void)CGF.EmitRuntimeCall( 6193 OMPBuilder.getOrCreateRuntimeFunction( 6194 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6195 Args); 6196 } 6197 6198 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6199 SourceLocation Loc, 6200 ReductionCodeGen &RCG, 6201 unsigned N) { 6202 auto Sizes = RCG.getSizes(N); 6203 // Emit threadprivate global variable if the type is non-constant 6204 // (Sizes.second = nullptr). 6205 if (Sizes.second) { 6206 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6207 /*isSigned=*/false); 6208 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6209 CGF, CGM.getContext().getSizeType(), 6210 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6211 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6212 } 6213 } 6214 6215 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6216 SourceLocation Loc, 6217 llvm::Value *ReductionsPtr, 6218 LValue SharedLVal) { 6219 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6220 // *d); 6221 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6222 CGM.IntTy, 6223 /*isSigned=*/true), 6224 ReductionsPtr, 6225 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6226 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6227 return Address( 6228 CGF.EmitRuntimeCall( 6229 OMPBuilder.getOrCreateRuntimeFunction( 6230 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6231 Args), 6232 SharedLVal.getAlignment()); 6233 } 6234 6235 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6236 SourceLocation Loc) { 6237 if (!CGF.HaveInsertPoint()) 6238 return; 6239 6240 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6241 OMPBuilder.createTaskwait(CGF.Builder); 6242 } else { 6243 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6244 // global_tid); 6245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6246 // Ignore return result until untied tasks are supported. 6247 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6248 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6249 Args); 6250 } 6251 6252 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6253 Region->emitUntiedSwitch(CGF); 6254 } 6255 6256 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6257 OpenMPDirectiveKind InnerKind, 6258 const RegionCodeGenTy &CodeGen, 6259 bool HasCancel) { 6260 if (!CGF.HaveInsertPoint()) 6261 return; 6262 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6263 InnerKind != OMPD_critical && 6264 InnerKind != OMPD_master && 6265 InnerKind != OMPD_masked); 6266 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6267 } 6268 6269 namespace { 6270 enum RTCancelKind { 6271 CancelNoreq = 0, 6272 CancelParallel = 1, 6273 CancelLoop = 2, 6274 CancelSections = 3, 6275 CancelTaskgroup = 4 6276 }; 6277 } // anonymous namespace 6278 6279 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6280 RTCancelKind CancelKind = CancelNoreq; 6281 if (CancelRegion == OMPD_parallel) 6282 CancelKind = CancelParallel; 6283 else if (CancelRegion == OMPD_for) 6284 CancelKind = CancelLoop; 6285 else if (CancelRegion == OMPD_sections) 6286 CancelKind = CancelSections; 6287 else { 6288 assert(CancelRegion == OMPD_taskgroup); 6289 CancelKind = CancelTaskgroup; 6290 } 6291 return CancelKind; 6292 } 6293 6294 void CGOpenMPRuntime::emitCancellationPointCall( 6295 CodeGenFunction &CGF, SourceLocation Loc, 6296 OpenMPDirectiveKind CancelRegion) { 6297 if (!CGF.HaveInsertPoint()) 6298 return; 6299 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6300 // global_tid, kmp_int32 cncl_kind); 6301 if (auto *OMPRegionInfo = 6302 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6303 // For 'cancellation point taskgroup', the task region info may not have a 6304 // cancel. This may instead happen in another adjacent task. 6305 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6306 llvm::Value *Args[] = { 6307 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6308 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6309 // Ignore return result until untied tasks are supported. 6310 llvm::Value *Result = CGF.EmitRuntimeCall( 6311 OMPBuilder.getOrCreateRuntimeFunction( 6312 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6313 Args); 6314 // if (__kmpc_cancellationpoint()) { 6315 // exit from construct; 6316 // } 6317 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6318 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6319 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6320 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6321 CGF.EmitBlock(ExitBB); 6322 // exit from construct; 6323 CodeGenFunction::JumpDest CancelDest = 6324 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6325 CGF.EmitBranchThroughCleanup(CancelDest); 6326 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6327 } 6328 } 6329 } 6330 6331 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6332 const Expr *IfCond, 6333 OpenMPDirectiveKind CancelRegion) { 6334 if (!CGF.HaveInsertPoint()) 6335 return; 6336 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6337 // kmp_int32 cncl_kind); 6338 auto &M = CGM.getModule(); 6339 if (auto *OMPRegionInfo = 6340 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6341 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6342 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6343 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6344 llvm::Value *Args[] = { 6345 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6346 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6347 // Ignore return result until untied tasks are supported. 6348 llvm::Value *Result = CGF.EmitRuntimeCall( 6349 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6350 // if (__kmpc_cancel()) { 6351 // exit from construct; 6352 // } 6353 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6354 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6355 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6356 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6357 CGF.EmitBlock(ExitBB); 6358 // exit from construct; 6359 CodeGenFunction::JumpDest CancelDest = 6360 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6361 CGF.EmitBranchThroughCleanup(CancelDest); 6362 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6363 }; 6364 if (IfCond) { 6365 emitIfClause(CGF, IfCond, ThenGen, 6366 [](CodeGenFunction &, PrePostActionTy &) {}); 6367 } else { 6368 RegionCodeGenTy ThenRCG(ThenGen); 6369 ThenRCG(CGF); 6370 } 6371 } 6372 } 6373 6374 namespace { 6375 /// Cleanup action for uses_allocators support. 6376 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6377 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6378 6379 public: 6380 OMPUsesAllocatorsActionTy( 6381 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6382 : Allocators(Allocators) {} 6383 void Enter(CodeGenFunction &CGF) override { 6384 if (!CGF.HaveInsertPoint()) 6385 return; 6386 for (const auto &AllocatorData : Allocators) { 6387 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6388 CGF, AllocatorData.first, AllocatorData.second); 6389 } 6390 } 6391 void Exit(CodeGenFunction &CGF) override { 6392 if (!CGF.HaveInsertPoint()) 6393 return; 6394 for (const auto &AllocatorData : Allocators) { 6395 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6396 AllocatorData.first); 6397 } 6398 } 6399 }; 6400 } // namespace 6401 6402 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6403 const OMPExecutableDirective &D, StringRef ParentName, 6404 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6405 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6406 assert(!ParentName.empty() && "Invalid target region parent name!"); 6407 HasEmittedTargetRegion = true; 6408 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6409 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6410 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6411 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6412 if (!D.AllocatorTraits) 6413 continue; 6414 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6415 } 6416 } 6417 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6418 CodeGen.setAction(UsesAllocatorAction); 6419 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6420 IsOffloadEntry, CodeGen); 6421 } 6422 6423 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6424 const Expr *Allocator, 6425 const Expr *AllocatorTraits) { 6426 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6427 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6428 // Use default memspace handle. 6429 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6430 llvm::Value *NumTraits = llvm::ConstantInt::get( 6431 CGF.IntTy, cast<ConstantArrayType>( 6432 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6433 ->getSize() 6434 .getLimitedValue()); 6435 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6436 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6437 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6438 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6439 AllocatorTraitsLVal.getBaseInfo(), 6440 AllocatorTraitsLVal.getTBAAInfo()); 6441 llvm::Value *Traits = 6442 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6443 6444 llvm::Value *AllocatorVal = 6445 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6446 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6447 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6448 // Store to allocator. 6449 CGF.EmitVarDecl(*cast<VarDecl>( 6450 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6451 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6452 AllocatorVal = 6453 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6454 Allocator->getType(), Allocator->getExprLoc()); 6455 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6456 } 6457 6458 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6459 const Expr *Allocator) { 6460 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6461 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6462 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6463 llvm::Value *AllocatorVal = 6464 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6465 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6466 CGF.getContext().VoidPtrTy, 6467 Allocator->getExprLoc()); 6468 (void)CGF.EmitRuntimeCall( 6469 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6470 OMPRTL___kmpc_destroy_allocator), 6471 {ThreadId, AllocatorVal}); 6472 } 6473 6474 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6475 const OMPExecutableDirective &D, StringRef ParentName, 6476 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6477 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6478 // Create a unique name for the entry function using the source location 6479 // information of the current target region. The name will be something like: 6480 // 6481 // __omp_offloading_DD_FFFF_PP_lBB 6482 // 6483 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6484 // mangled name of the function that encloses the target region and BB is the 6485 // line number of the target region. 6486 6487 unsigned DeviceID; 6488 unsigned FileID; 6489 unsigned Line; 6490 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6491 Line); 6492 SmallString<64> EntryFnName; 6493 { 6494 llvm::raw_svector_ostream OS(EntryFnName); 6495 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6496 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6497 } 6498 6499 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6500 6501 CodeGenFunction CGF(CGM, true); 6502 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6503 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6504 6505 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6506 6507 // If this target outline function is not an offload entry, we don't need to 6508 // register it. 6509 if (!IsOffloadEntry) 6510 return; 6511 6512 // The target region ID is used by the runtime library to identify the current 6513 // target region, so it only has to be unique and not necessarily point to 6514 // anything. It could be the pointer to the outlined function that implements 6515 // the target region, but we aren't using that so that the compiler doesn't 6516 // need to keep that, and could therefore inline the host function if proven 6517 // worthwhile during optimization. In the other hand, if emitting code for the 6518 // device, the ID has to be the function address so that it can retrieved from 6519 // the offloading entry and launched by the runtime library. We also mark the 6520 // outlined function to have external linkage in case we are emitting code for 6521 // the device, because these functions will be entry points to the device. 6522 6523 if (CGM.getLangOpts().OpenMPIsDevice) { 6524 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6525 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6526 OutlinedFn->setDSOLocal(false); 6527 if (CGM.getTriple().isAMDGCN()) 6528 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6529 } else { 6530 std::string Name = getName({EntryFnName, "region_id"}); 6531 OutlinedFnID = new llvm::GlobalVariable( 6532 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6533 llvm::GlobalValue::WeakAnyLinkage, 6534 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6535 } 6536 6537 // Register the information for the entry associated with this target region. 6538 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6539 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6540 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6541 } 6542 6543 /// Checks if the expression is constant or does not have non-trivial function 6544 /// calls. 6545 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6546 // We can skip constant expressions. 6547 // We can skip expressions with trivial calls or simple expressions. 6548 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6549 !E->hasNonTrivialCall(Ctx)) && 6550 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6551 } 6552 6553 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6554 const Stmt *Body) { 6555 const Stmt *Child = Body->IgnoreContainers(); 6556 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6557 Child = nullptr; 6558 for (const Stmt *S : C->body()) { 6559 if (const auto *E = dyn_cast<Expr>(S)) { 6560 if (isTrivial(Ctx, E)) 6561 continue; 6562 } 6563 // Some of the statements can be ignored. 6564 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6565 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6566 continue; 6567 // Analyze declarations. 6568 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6569 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6570 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6571 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6572 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6573 isa<UsingDirectiveDecl>(D) || 6574 isa<OMPDeclareReductionDecl>(D) || 6575 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6576 return true; 6577 const auto *VD = dyn_cast<VarDecl>(D); 6578 if (!VD) 6579 return false; 6580 return VD->isConstexpr() || 6581 ((VD->getType().isTrivialType(Ctx) || 6582 VD->getType()->isReferenceType()) && 6583 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6584 })) 6585 continue; 6586 } 6587 // Found multiple children - cannot get the one child only. 6588 if (Child) 6589 return nullptr; 6590 Child = S; 6591 } 6592 if (Child) 6593 Child = Child->IgnoreContainers(); 6594 } 6595 return Child; 6596 } 6597 6598 /// Emit the number of teams for a target directive. Inspect the num_teams 6599 /// clause associated with a teams construct combined or closely nested 6600 /// with the target directive. 6601 /// 6602 /// Emit a team of size one for directives such as 'target parallel' that 6603 /// have no associated teams construct. 6604 /// 6605 /// Otherwise, return nullptr. 6606 static llvm::Value * 6607 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6608 const OMPExecutableDirective &D) { 6609 assert(!CGF.getLangOpts().OpenMPIsDevice && 6610 "Clauses associated with the teams directive expected to be emitted " 6611 "only for the host!"); 6612 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6613 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6614 "Expected target-based executable directive."); 6615 CGBuilderTy &Bld = CGF.Builder; 6616 switch (DirectiveKind) { 6617 case OMPD_target: { 6618 const auto *CS = D.getInnermostCapturedStmt(); 6619 const auto *Body = 6620 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6621 const Stmt *ChildStmt = 6622 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6623 if (const auto *NestedDir = 6624 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6625 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6626 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6627 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6628 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6629 const Expr *NumTeams = 6630 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6631 llvm::Value *NumTeamsVal = 6632 CGF.EmitScalarExpr(NumTeams, 6633 /*IgnoreResultAssign*/ true); 6634 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6635 /*isSigned=*/true); 6636 } 6637 return Bld.getInt32(0); 6638 } 6639 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6640 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6641 return Bld.getInt32(1); 6642 return Bld.getInt32(0); 6643 } 6644 return nullptr; 6645 } 6646 case OMPD_target_teams: 6647 case OMPD_target_teams_distribute: 6648 case OMPD_target_teams_distribute_simd: 6649 case OMPD_target_teams_distribute_parallel_for: 6650 case OMPD_target_teams_distribute_parallel_for_simd: { 6651 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6652 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6653 const Expr *NumTeams = 6654 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6655 llvm::Value *NumTeamsVal = 6656 CGF.EmitScalarExpr(NumTeams, 6657 /*IgnoreResultAssign*/ true); 6658 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6659 /*isSigned=*/true); 6660 } 6661 return Bld.getInt32(0); 6662 } 6663 case OMPD_target_parallel: 6664 case OMPD_target_parallel_for: 6665 case OMPD_target_parallel_for_simd: 6666 case OMPD_target_simd: 6667 return Bld.getInt32(1); 6668 case OMPD_parallel: 6669 case OMPD_for: 6670 case OMPD_parallel_for: 6671 case OMPD_parallel_master: 6672 case OMPD_parallel_sections: 6673 case OMPD_for_simd: 6674 case OMPD_parallel_for_simd: 6675 case OMPD_cancel: 6676 case OMPD_cancellation_point: 6677 case OMPD_ordered: 6678 case OMPD_threadprivate: 6679 case OMPD_allocate: 6680 case OMPD_task: 6681 case OMPD_simd: 6682 case OMPD_tile: 6683 case OMPD_sections: 6684 case OMPD_section: 6685 case OMPD_single: 6686 case OMPD_master: 6687 case OMPD_critical: 6688 case OMPD_taskyield: 6689 case OMPD_barrier: 6690 case OMPD_taskwait: 6691 case OMPD_taskgroup: 6692 case OMPD_atomic: 6693 case OMPD_flush: 6694 case OMPD_depobj: 6695 case OMPD_scan: 6696 case OMPD_teams: 6697 case OMPD_target_data: 6698 case OMPD_target_exit_data: 6699 case OMPD_target_enter_data: 6700 case OMPD_distribute: 6701 case OMPD_distribute_simd: 6702 case OMPD_distribute_parallel_for: 6703 case OMPD_distribute_parallel_for_simd: 6704 case OMPD_teams_distribute: 6705 case OMPD_teams_distribute_simd: 6706 case OMPD_teams_distribute_parallel_for: 6707 case OMPD_teams_distribute_parallel_for_simd: 6708 case OMPD_target_update: 6709 case OMPD_declare_simd: 6710 case OMPD_declare_variant: 6711 case OMPD_begin_declare_variant: 6712 case OMPD_end_declare_variant: 6713 case OMPD_declare_target: 6714 case OMPD_end_declare_target: 6715 case OMPD_declare_reduction: 6716 case OMPD_declare_mapper: 6717 case OMPD_taskloop: 6718 case OMPD_taskloop_simd: 6719 case OMPD_master_taskloop: 6720 case OMPD_master_taskloop_simd: 6721 case OMPD_parallel_master_taskloop: 6722 case OMPD_parallel_master_taskloop_simd: 6723 case OMPD_requires: 6724 case OMPD_unknown: 6725 break; 6726 default: 6727 break; 6728 } 6729 llvm_unreachable("Unexpected directive kind."); 6730 } 6731 6732 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6733 llvm::Value *DefaultThreadLimitVal) { 6734 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6735 CGF.getContext(), CS->getCapturedStmt()); 6736 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6737 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6738 llvm::Value *NumThreads = nullptr; 6739 llvm::Value *CondVal = nullptr; 6740 // Handle if clause. If if clause present, the number of threads is 6741 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6742 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6743 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6744 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6745 const OMPIfClause *IfClause = nullptr; 6746 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6747 if (C->getNameModifier() == OMPD_unknown || 6748 C->getNameModifier() == OMPD_parallel) { 6749 IfClause = C; 6750 break; 6751 } 6752 } 6753 if (IfClause) { 6754 const Expr *Cond = IfClause->getCondition(); 6755 bool Result; 6756 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6757 if (!Result) 6758 return CGF.Builder.getInt32(1); 6759 } else { 6760 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6761 if (const auto *PreInit = 6762 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6763 for (const auto *I : PreInit->decls()) { 6764 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6765 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6766 } else { 6767 CodeGenFunction::AutoVarEmission Emission = 6768 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6769 CGF.EmitAutoVarCleanups(Emission); 6770 } 6771 } 6772 } 6773 CondVal = CGF.EvaluateExprAsBool(Cond); 6774 } 6775 } 6776 } 6777 // Check the value of num_threads clause iff if clause was not specified 6778 // or is not evaluated to false. 6779 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6780 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6781 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6782 const auto *NumThreadsClause = 6783 Dir->getSingleClause<OMPNumThreadsClause>(); 6784 CodeGenFunction::LexicalScope Scope( 6785 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6786 if (const auto *PreInit = 6787 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6788 for (const auto *I : PreInit->decls()) { 6789 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6790 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6791 } else { 6792 CodeGenFunction::AutoVarEmission Emission = 6793 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6794 CGF.EmitAutoVarCleanups(Emission); 6795 } 6796 } 6797 } 6798 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6799 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6800 /*isSigned=*/false); 6801 if (DefaultThreadLimitVal) 6802 NumThreads = CGF.Builder.CreateSelect( 6803 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6804 DefaultThreadLimitVal, NumThreads); 6805 } else { 6806 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6807 : CGF.Builder.getInt32(0); 6808 } 6809 // Process condition of the if clause. 6810 if (CondVal) { 6811 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6812 CGF.Builder.getInt32(1)); 6813 } 6814 return NumThreads; 6815 } 6816 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6817 return CGF.Builder.getInt32(1); 6818 return DefaultThreadLimitVal; 6819 } 6820 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6821 : CGF.Builder.getInt32(0); 6822 } 6823 6824 /// Emit the number of threads for a target directive. Inspect the 6825 /// thread_limit clause associated with a teams construct combined or closely 6826 /// nested with the target directive. 6827 /// 6828 /// Emit the num_threads clause for directives such as 'target parallel' that 6829 /// have no associated teams construct. 6830 /// 6831 /// Otherwise, return nullptr. 6832 static llvm::Value * 6833 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6834 const OMPExecutableDirective &D) { 6835 assert(!CGF.getLangOpts().OpenMPIsDevice && 6836 "Clauses associated with the teams directive expected to be emitted " 6837 "only for the host!"); 6838 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6839 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6840 "Expected target-based executable directive."); 6841 CGBuilderTy &Bld = CGF.Builder; 6842 llvm::Value *ThreadLimitVal = nullptr; 6843 llvm::Value *NumThreadsVal = nullptr; 6844 switch (DirectiveKind) { 6845 case OMPD_target: { 6846 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6847 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6848 return NumThreads; 6849 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6850 CGF.getContext(), CS->getCapturedStmt()); 6851 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6852 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6853 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6854 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6855 const auto *ThreadLimitClause = 6856 Dir->getSingleClause<OMPThreadLimitClause>(); 6857 CodeGenFunction::LexicalScope Scope( 6858 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6859 if (const auto *PreInit = 6860 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6861 for (const auto *I : PreInit->decls()) { 6862 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6863 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6864 } else { 6865 CodeGenFunction::AutoVarEmission Emission = 6866 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6867 CGF.EmitAutoVarCleanups(Emission); 6868 } 6869 } 6870 } 6871 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6872 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6873 ThreadLimitVal = 6874 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6875 } 6876 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6877 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6878 CS = Dir->getInnermostCapturedStmt(); 6879 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6880 CGF.getContext(), CS->getCapturedStmt()); 6881 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6882 } 6883 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6884 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6885 CS = Dir->getInnermostCapturedStmt(); 6886 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6887 return NumThreads; 6888 } 6889 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6890 return Bld.getInt32(1); 6891 } 6892 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6893 } 6894 case OMPD_target_teams: { 6895 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6896 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6897 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6898 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6899 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6900 ThreadLimitVal = 6901 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6902 } 6903 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6904 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6905 return NumThreads; 6906 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6907 CGF.getContext(), CS->getCapturedStmt()); 6908 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6909 if (Dir->getDirectiveKind() == OMPD_distribute) { 6910 CS = Dir->getInnermostCapturedStmt(); 6911 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6912 return NumThreads; 6913 } 6914 } 6915 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6916 } 6917 case OMPD_target_teams_distribute: 6918 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6919 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6920 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6921 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6922 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6923 ThreadLimitVal = 6924 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6925 } 6926 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6927 case OMPD_target_parallel: 6928 case OMPD_target_parallel_for: 6929 case OMPD_target_parallel_for_simd: 6930 case OMPD_target_teams_distribute_parallel_for: 6931 case OMPD_target_teams_distribute_parallel_for_simd: { 6932 llvm::Value *CondVal = nullptr; 6933 // Handle if clause. If if clause present, the number of threads is 6934 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6935 if (D.hasClausesOfKind<OMPIfClause>()) { 6936 const OMPIfClause *IfClause = nullptr; 6937 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6938 if (C->getNameModifier() == OMPD_unknown || 6939 C->getNameModifier() == OMPD_parallel) { 6940 IfClause = C; 6941 break; 6942 } 6943 } 6944 if (IfClause) { 6945 const Expr *Cond = IfClause->getCondition(); 6946 bool Result; 6947 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6948 if (!Result) 6949 return Bld.getInt32(1); 6950 } else { 6951 CodeGenFunction::RunCleanupsScope Scope(CGF); 6952 CondVal = CGF.EvaluateExprAsBool(Cond); 6953 } 6954 } 6955 } 6956 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6957 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6958 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6959 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6960 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6961 ThreadLimitVal = 6962 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6963 } 6964 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6965 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6966 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6967 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6968 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6969 NumThreadsVal = 6970 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6971 ThreadLimitVal = ThreadLimitVal 6972 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6973 ThreadLimitVal), 6974 NumThreadsVal, ThreadLimitVal) 6975 : NumThreadsVal; 6976 } 6977 if (!ThreadLimitVal) 6978 ThreadLimitVal = Bld.getInt32(0); 6979 if (CondVal) 6980 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6981 return ThreadLimitVal; 6982 } 6983 case OMPD_target_teams_distribute_simd: 6984 case OMPD_target_simd: 6985 return Bld.getInt32(1); 6986 case OMPD_parallel: 6987 case OMPD_for: 6988 case OMPD_parallel_for: 6989 case OMPD_parallel_master: 6990 case OMPD_parallel_sections: 6991 case OMPD_for_simd: 6992 case OMPD_parallel_for_simd: 6993 case OMPD_cancel: 6994 case OMPD_cancellation_point: 6995 case OMPD_ordered: 6996 case OMPD_threadprivate: 6997 case OMPD_allocate: 6998 case OMPD_task: 6999 case OMPD_simd: 7000 case OMPD_tile: 7001 case OMPD_sections: 7002 case OMPD_section: 7003 case OMPD_single: 7004 case OMPD_master: 7005 case OMPD_critical: 7006 case OMPD_taskyield: 7007 case OMPD_barrier: 7008 case OMPD_taskwait: 7009 case OMPD_taskgroup: 7010 case OMPD_atomic: 7011 case OMPD_flush: 7012 case OMPD_depobj: 7013 case OMPD_scan: 7014 case OMPD_teams: 7015 case OMPD_target_data: 7016 case OMPD_target_exit_data: 7017 case OMPD_target_enter_data: 7018 case OMPD_distribute: 7019 case OMPD_distribute_simd: 7020 case OMPD_distribute_parallel_for: 7021 case OMPD_distribute_parallel_for_simd: 7022 case OMPD_teams_distribute: 7023 case OMPD_teams_distribute_simd: 7024 case OMPD_teams_distribute_parallel_for: 7025 case OMPD_teams_distribute_parallel_for_simd: 7026 case OMPD_target_update: 7027 case OMPD_declare_simd: 7028 case OMPD_declare_variant: 7029 case OMPD_begin_declare_variant: 7030 case OMPD_end_declare_variant: 7031 case OMPD_declare_target: 7032 case OMPD_end_declare_target: 7033 case OMPD_declare_reduction: 7034 case OMPD_declare_mapper: 7035 case OMPD_taskloop: 7036 case OMPD_taskloop_simd: 7037 case OMPD_master_taskloop: 7038 case OMPD_master_taskloop_simd: 7039 case OMPD_parallel_master_taskloop: 7040 case OMPD_parallel_master_taskloop_simd: 7041 case OMPD_requires: 7042 case OMPD_unknown: 7043 break; 7044 default: 7045 break; 7046 } 7047 llvm_unreachable("Unsupported directive kind."); 7048 } 7049 7050 namespace { 7051 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7052 7053 // Utility to handle information from clauses associated with a given 7054 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7055 // It provides a convenient interface to obtain the information and generate 7056 // code for that information. 7057 class MappableExprsHandler { 7058 public: 7059 /// Values for bit flags used to specify the mapping type for 7060 /// offloading. 7061 enum OpenMPOffloadMappingFlags : uint64_t { 7062 /// No flags 7063 OMP_MAP_NONE = 0x0, 7064 /// Allocate memory on the device and move data from host to device. 7065 OMP_MAP_TO = 0x01, 7066 /// Allocate memory on the device and move data from device to host. 7067 OMP_MAP_FROM = 0x02, 7068 /// Always perform the requested mapping action on the element, even 7069 /// if it was already mapped before. 7070 OMP_MAP_ALWAYS = 0x04, 7071 /// Delete the element from the device environment, ignoring the 7072 /// current reference count associated with the element. 7073 OMP_MAP_DELETE = 0x08, 7074 /// The element being mapped is a pointer-pointee pair; both the 7075 /// pointer and the pointee should be mapped. 7076 OMP_MAP_PTR_AND_OBJ = 0x10, 7077 /// This flags signals that the base address of an entry should be 7078 /// passed to the target kernel as an argument. 7079 OMP_MAP_TARGET_PARAM = 0x20, 7080 /// Signal that the runtime library has to return the device pointer 7081 /// in the current position for the data being mapped. Used when we have the 7082 /// use_device_ptr or use_device_addr clause. 7083 OMP_MAP_RETURN_PARAM = 0x40, 7084 /// This flag signals that the reference being passed is a pointer to 7085 /// private data. 7086 OMP_MAP_PRIVATE = 0x80, 7087 /// Pass the element to the device by value. 7088 OMP_MAP_LITERAL = 0x100, 7089 /// Implicit map 7090 OMP_MAP_IMPLICIT = 0x200, 7091 /// Close is a hint to the runtime to allocate memory close to 7092 /// the target device. 7093 OMP_MAP_CLOSE = 0x400, 7094 /// 0x800 is reserved for compatibility with XLC. 7095 /// Produce a runtime error if the data is not already allocated. 7096 OMP_MAP_PRESENT = 0x1000, 7097 /// Signal that the runtime library should use args as an array of 7098 /// descriptor_dim pointers and use args_size as dims. Used when we have 7099 /// non-contiguous list items in target update directive 7100 OMP_MAP_NON_CONTIG = 0x100000000000, 7101 /// The 16 MSBs of the flags indicate whether the entry is member of some 7102 /// struct/class. 7103 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7104 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7105 }; 7106 7107 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7108 static unsigned getFlagMemberOffset() { 7109 unsigned Offset = 0; 7110 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7111 Remain = Remain >> 1) 7112 Offset++; 7113 return Offset; 7114 } 7115 7116 /// Class that holds debugging information for a data mapping to be passed to 7117 /// the runtime library. 7118 class MappingExprInfo { 7119 /// The variable declaration used for the data mapping. 7120 const ValueDecl *MapDecl = nullptr; 7121 /// The original expression used in the map clause, or null if there is 7122 /// none. 7123 const Expr *MapExpr = nullptr; 7124 7125 public: 7126 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7127 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7128 7129 const ValueDecl *getMapDecl() const { return MapDecl; } 7130 const Expr *getMapExpr() const { return MapExpr; } 7131 }; 7132 7133 /// Class that associates information with a base pointer to be passed to the 7134 /// runtime library. 7135 class BasePointerInfo { 7136 /// The base pointer. 7137 llvm::Value *Ptr = nullptr; 7138 /// The base declaration that refers to this device pointer, or null if 7139 /// there is none. 7140 const ValueDecl *DevPtrDecl = nullptr; 7141 7142 public: 7143 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7144 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7145 llvm::Value *operator*() const { return Ptr; } 7146 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7147 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7148 }; 7149 7150 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7151 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7152 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7153 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7154 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7155 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7156 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7157 7158 /// This structure contains combined information generated for mappable 7159 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7160 /// mappers, and non-contiguous information. 7161 struct MapCombinedInfoTy { 7162 struct StructNonContiguousInfo { 7163 bool IsNonContiguous = false; 7164 MapDimArrayTy Dims; 7165 MapNonContiguousArrayTy Offsets; 7166 MapNonContiguousArrayTy Counts; 7167 MapNonContiguousArrayTy Strides; 7168 }; 7169 MapExprsArrayTy Exprs; 7170 MapBaseValuesArrayTy BasePointers; 7171 MapValuesArrayTy Pointers; 7172 MapValuesArrayTy Sizes; 7173 MapFlagsArrayTy Types; 7174 MapMappersArrayTy Mappers; 7175 StructNonContiguousInfo NonContigInfo; 7176 7177 /// Append arrays in \a CurInfo. 7178 void append(MapCombinedInfoTy &CurInfo) { 7179 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7180 BasePointers.append(CurInfo.BasePointers.begin(), 7181 CurInfo.BasePointers.end()); 7182 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7183 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7184 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7185 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7186 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7187 CurInfo.NonContigInfo.Dims.end()); 7188 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7189 CurInfo.NonContigInfo.Offsets.end()); 7190 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7191 CurInfo.NonContigInfo.Counts.end()); 7192 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7193 CurInfo.NonContigInfo.Strides.end()); 7194 } 7195 }; 7196 7197 /// Map between a struct and the its lowest & highest elements which have been 7198 /// mapped. 7199 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7200 /// HE(FieldIndex, Pointer)} 7201 struct StructRangeInfoTy { 7202 MapCombinedInfoTy PreliminaryMapData; 7203 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7204 0, Address::invalid()}; 7205 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7206 0, Address::invalid()}; 7207 Address Base = Address::invalid(); 7208 Address LB = Address::invalid(); 7209 bool IsArraySection = false; 7210 bool HasCompleteRecord = false; 7211 }; 7212 7213 private: 7214 /// Kind that defines how a device pointer has to be returned. 7215 struct MapInfo { 7216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7217 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7218 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7219 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7220 bool ReturnDevicePointer = false; 7221 bool IsImplicit = false; 7222 const ValueDecl *Mapper = nullptr; 7223 const Expr *VarRef = nullptr; 7224 bool ForDeviceAddr = false; 7225 7226 MapInfo() = default; 7227 MapInfo( 7228 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7229 OpenMPMapClauseKind MapType, 7230 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7231 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7232 bool ReturnDevicePointer, bool IsImplicit, 7233 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7234 bool ForDeviceAddr = false) 7235 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7236 MotionModifiers(MotionModifiers), 7237 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7238 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7239 }; 7240 7241 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7242 /// member and there is no map information about it, then emission of that 7243 /// entry is deferred until the whole struct has been processed. 7244 struct DeferredDevicePtrEntryTy { 7245 const Expr *IE = nullptr; 7246 const ValueDecl *VD = nullptr; 7247 bool ForDeviceAddr = false; 7248 7249 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7250 bool ForDeviceAddr) 7251 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7252 }; 7253 7254 /// The target directive from where the mappable clauses were extracted. It 7255 /// is either a executable directive or a user-defined mapper directive. 7256 llvm::PointerUnion<const OMPExecutableDirective *, 7257 const OMPDeclareMapperDecl *> 7258 CurDir; 7259 7260 /// Function the directive is being generated for. 7261 CodeGenFunction &CGF; 7262 7263 /// Set of all first private variables in the current directive. 7264 /// bool data is set to true if the variable is implicitly marked as 7265 /// firstprivate, false otherwise. 7266 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7267 7268 /// Map between device pointer declarations and their expression components. 7269 /// The key value for declarations in 'this' is null. 7270 llvm::DenseMap< 7271 const ValueDecl *, 7272 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7273 DevPointersMap; 7274 7275 llvm::Value *getExprTypeSize(const Expr *E) const { 7276 QualType ExprTy = E->getType().getCanonicalType(); 7277 7278 // Calculate the size for array shaping expression. 7279 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7280 llvm::Value *Size = 7281 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7282 for (const Expr *SE : OAE->getDimensions()) { 7283 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7284 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7285 CGF.getContext().getSizeType(), 7286 SE->getExprLoc()); 7287 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7288 } 7289 return Size; 7290 } 7291 7292 // Reference types are ignored for mapping purposes. 7293 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7294 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7295 7296 // Given that an array section is considered a built-in type, we need to 7297 // do the calculation based on the length of the section instead of relying 7298 // on CGF.getTypeSize(E->getType()). 7299 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7300 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7301 OAE->getBase()->IgnoreParenImpCasts()) 7302 .getCanonicalType(); 7303 7304 // If there is no length associated with the expression and lower bound is 7305 // not specified too, that means we are using the whole length of the 7306 // base. 7307 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7308 !OAE->getLowerBound()) 7309 return CGF.getTypeSize(BaseTy); 7310 7311 llvm::Value *ElemSize; 7312 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7313 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7314 } else { 7315 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7316 assert(ATy && "Expecting array type if not a pointer type."); 7317 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7318 } 7319 7320 // If we don't have a length at this point, that is because we have an 7321 // array section with a single element. 7322 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7323 return ElemSize; 7324 7325 if (const Expr *LenExpr = OAE->getLength()) { 7326 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7327 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7328 CGF.getContext().getSizeType(), 7329 LenExpr->getExprLoc()); 7330 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7331 } 7332 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7333 OAE->getLowerBound() && "expected array_section[lb:]."); 7334 // Size = sizetype - lb * elemtype; 7335 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7336 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7337 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7338 CGF.getContext().getSizeType(), 7339 OAE->getLowerBound()->getExprLoc()); 7340 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7341 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7342 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7343 LengthVal = CGF.Builder.CreateSelect( 7344 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7345 return LengthVal; 7346 } 7347 return CGF.getTypeSize(ExprTy); 7348 } 7349 7350 /// Return the corresponding bits for a given map clause modifier. Add 7351 /// a flag marking the map as a pointer if requested. Add a flag marking the 7352 /// map as the first one of a series of maps that relate to the same map 7353 /// expression. 7354 OpenMPOffloadMappingFlags getMapTypeBits( 7355 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7356 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7357 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7358 OpenMPOffloadMappingFlags Bits = 7359 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7360 switch (MapType) { 7361 case OMPC_MAP_alloc: 7362 case OMPC_MAP_release: 7363 // alloc and release is the default behavior in the runtime library, i.e. 7364 // if we don't pass any bits alloc/release that is what the runtime is 7365 // going to do. Therefore, we don't need to signal anything for these two 7366 // type modifiers. 7367 break; 7368 case OMPC_MAP_to: 7369 Bits |= OMP_MAP_TO; 7370 break; 7371 case OMPC_MAP_from: 7372 Bits |= OMP_MAP_FROM; 7373 break; 7374 case OMPC_MAP_tofrom: 7375 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7376 break; 7377 case OMPC_MAP_delete: 7378 Bits |= OMP_MAP_DELETE; 7379 break; 7380 case OMPC_MAP_unknown: 7381 llvm_unreachable("Unexpected map type!"); 7382 } 7383 if (AddPtrFlag) 7384 Bits |= OMP_MAP_PTR_AND_OBJ; 7385 if (AddIsTargetParamFlag) 7386 Bits |= OMP_MAP_TARGET_PARAM; 7387 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7388 != MapModifiers.end()) 7389 Bits |= OMP_MAP_ALWAYS; 7390 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7391 != MapModifiers.end()) 7392 Bits |= OMP_MAP_CLOSE; 7393 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7394 MapModifiers.end() || 7395 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7396 MotionModifiers.end()) 7397 Bits |= OMP_MAP_PRESENT; 7398 if (IsNonContiguous) 7399 Bits |= OMP_MAP_NON_CONTIG; 7400 return Bits; 7401 } 7402 7403 /// Return true if the provided expression is a final array section. A 7404 /// final array section, is one whose length can't be proved to be one. 7405 bool isFinalArraySectionExpression(const Expr *E) const { 7406 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7407 7408 // It is not an array section and therefore not a unity-size one. 7409 if (!OASE) 7410 return false; 7411 7412 // An array section with no colon always refer to a single element. 7413 if (OASE->getColonLocFirst().isInvalid()) 7414 return false; 7415 7416 const Expr *Length = OASE->getLength(); 7417 7418 // If we don't have a length we have to check if the array has size 1 7419 // for this dimension. Also, we should always expect a length if the 7420 // base type is pointer. 7421 if (!Length) { 7422 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7423 OASE->getBase()->IgnoreParenImpCasts()) 7424 .getCanonicalType(); 7425 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7426 return ATy->getSize().getSExtValue() != 1; 7427 // If we don't have a constant dimension length, we have to consider 7428 // the current section as having any size, so it is not necessarily 7429 // unitary. If it happen to be unity size, that's user fault. 7430 return true; 7431 } 7432 7433 // Check if the length evaluates to 1. 7434 Expr::EvalResult Result; 7435 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7436 return true; // Can have more that size 1. 7437 7438 llvm::APSInt ConstLength = Result.Val.getInt(); 7439 return ConstLength.getSExtValue() != 1; 7440 } 7441 7442 /// Generate the base pointers, section pointers, sizes, map type bits, and 7443 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7444 /// map type, map or motion modifiers, and expression components. 7445 /// \a IsFirstComponent should be set to true if the provided set of 7446 /// components is the first associated with a capture. 7447 void generateInfoForComponentList( 7448 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7449 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7450 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7451 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7452 bool IsFirstComponentList, bool IsImplicit, 7453 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7454 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7455 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7456 OverlappedElements = llvm::None) const { 7457 // The following summarizes what has to be generated for each map and the 7458 // types below. The generated information is expressed in this order: 7459 // base pointer, section pointer, size, flags 7460 // (to add to the ones that come from the map type and modifier). 7461 // 7462 // double d; 7463 // int i[100]; 7464 // float *p; 7465 // 7466 // struct S1 { 7467 // int i; 7468 // float f[50]; 7469 // } 7470 // struct S2 { 7471 // int i; 7472 // float f[50]; 7473 // S1 s; 7474 // double *p; 7475 // struct S2 *ps; 7476 // int &ref; 7477 // } 7478 // S2 s; 7479 // S2 *ps; 7480 // 7481 // map(d) 7482 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7483 // 7484 // map(i) 7485 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7486 // 7487 // map(i[1:23]) 7488 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7489 // 7490 // map(p) 7491 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7492 // 7493 // map(p[1:24]) 7494 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7495 // in unified shared memory mode or for local pointers 7496 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7497 // 7498 // map(s) 7499 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7500 // 7501 // map(s.i) 7502 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7503 // 7504 // map(s.s.f) 7505 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7506 // 7507 // map(s.p) 7508 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7509 // 7510 // map(to: s.p[:22]) 7511 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7512 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7513 // &(s.p), &(s.p[0]), 22*sizeof(double), 7514 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7515 // (*) alloc space for struct members, only this is a target parameter 7516 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7517 // optimizes this entry out, same in the examples below) 7518 // (***) map the pointee (map: to) 7519 // 7520 // map(to: s.ref) 7521 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7522 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7523 // (*) alloc space for struct members, only this is a target parameter 7524 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7525 // optimizes this entry out, same in the examples below) 7526 // (***) map the pointee (map: to) 7527 // 7528 // map(s.ps) 7529 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7530 // 7531 // map(from: s.ps->s.i) 7532 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7533 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7534 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7535 // 7536 // map(to: s.ps->ps) 7537 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7538 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7539 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7540 // 7541 // map(s.ps->ps->ps) 7542 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7543 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7544 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7545 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7546 // 7547 // map(to: s.ps->ps->s.f[:22]) 7548 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7549 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7550 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7551 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7552 // 7553 // map(ps) 7554 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7555 // 7556 // map(ps->i) 7557 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7558 // 7559 // map(ps->s.f) 7560 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7561 // 7562 // map(from: ps->p) 7563 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7564 // 7565 // map(to: ps->p[:22]) 7566 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7567 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7568 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7569 // 7570 // map(ps->ps) 7571 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7572 // 7573 // map(from: ps->ps->s.i) 7574 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7575 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7576 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7577 // 7578 // map(from: ps->ps->ps) 7579 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7580 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7581 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7582 // 7583 // map(ps->ps->ps->ps) 7584 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7585 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7586 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7587 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7588 // 7589 // map(to: ps->ps->ps->s.f[:22]) 7590 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7591 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7592 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7593 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7594 // 7595 // map(to: s.f[:22]) map(from: s.p[:33]) 7596 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7597 // sizeof(double*) (**), TARGET_PARAM 7598 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7599 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7600 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7601 // (*) allocate contiguous space needed to fit all mapped members even if 7602 // we allocate space for members not mapped (in this example, 7603 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7604 // them as well because they fall between &s.f[0] and &s.p) 7605 // 7606 // map(from: s.f[:22]) map(to: ps->p[:33]) 7607 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7608 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7609 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7610 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7611 // (*) the struct this entry pertains to is the 2nd element in the list of 7612 // arguments, hence MEMBER_OF(2) 7613 // 7614 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7615 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7616 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7617 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7618 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7619 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7620 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7621 // (*) the struct this entry pertains to is the 4th element in the list 7622 // of arguments, hence MEMBER_OF(4) 7623 7624 // Track if the map information being generated is the first for a capture. 7625 bool IsCaptureFirstInfo = IsFirstComponentList; 7626 // When the variable is on a declare target link or in a to clause with 7627 // unified memory, a reference is needed to hold the host/device address 7628 // of the variable. 7629 bool RequiresReference = false; 7630 7631 // Scan the components from the base to the complete expression. 7632 auto CI = Components.rbegin(); 7633 auto CE = Components.rend(); 7634 auto I = CI; 7635 7636 // Track if the map information being generated is the first for a list of 7637 // components. 7638 bool IsExpressionFirstInfo = true; 7639 bool FirstPointerInComplexData = false; 7640 Address BP = Address::invalid(); 7641 const Expr *AssocExpr = I->getAssociatedExpression(); 7642 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7643 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7644 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7645 7646 if (isa<MemberExpr>(AssocExpr)) { 7647 // The base is the 'this' pointer. The content of the pointer is going 7648 // to be the base of the field being mapped. 7649 BP = CGF.LoadCXXThisAddress(); 7650 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7651 (OASE && 7652 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7653 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7654 } else if (OAShE && 7655 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7656 BP = Address( 7657 CGF.EmitScalarExpr(OAShE->getBase()), 7658 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7659 } else { 7660 // The base is the reference to the variable. 7661 // BP = &Var. 7662 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7663 if (const auto *VD = 7664 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7665 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7666 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7667 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7668 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7669 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7670 RequiresReference = true; 7671 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7672 } 7673 } 7674 } 7675 7676 // If the variable is a pointer and is being dereferenced (i.e. is not 7677 // the last component), the base has to be the pointer itself, not its 7678 // reference. References are ignored for mapping purposes. 7679 QualType Ty = 7680 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7681 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7682 // No need to generate individual map information for the pointer, it 7683 // can be associated with the combined storage if shared memory mode is 7684 // active or the base declaration is not global variable. 7685 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7686 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7687 !VD || VD->hasLocalStorage()) 7688 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7689 else 7690 FirstPointerInComplexData = true; 7691 ++I; 7692 } 7693 } 7694 7695 // Track whether a component of the list should be marked as MEMBER_OF some 7696 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7697 // in a component list should be marked as MEMBER_OF, all subsequent entries 7698 // do not belong to the base struct. E.g. 7699 // struct S2 s; 7700 // s.ps->ps->ps->f[:] 7701 // (1) (2) (3) (4) 7702 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7703 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7704 // is the pointee of ps(2) which is not member of struct s, so it should not 7705 // be marked as such (it is still PTR_AND_OBJ). 7706 // The variable is initialized to false so that PTR_AND_OBJ entries which 7707 // are not struct members are not considered (e.g. array of pointers to 7708 // data). 7709 bool ShouldBeMemberOf = false; 7710 7711 // Variable keeping track of whether or not we have encountered a component 7712 // in the component list which is a member expression. Useful when we have a 7713 // pointer or a final array section, in which case it is the previous 7714 // component in the list which tells us whether we have a member expression. 7715 // E.g. X.f[:] 7716 // While processing the final array section "[:]" it is "f" which tells us 7717 // whether we are dealing with a member of a declared struct. 7718 const MemberExpr *EncounteredME = nullptr; 7719 7720 // Track for the total number of dimension. Start from one for the dummy 7721 // dimension. 7722 uint64_t DimSize = 1; 7723 7724 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7725 bool IsPrevMemberReference = false; 7726 7727 for (; I != CE; ++I) { 7728 // If the current component is member of a struct (parent struct) mark it. 7729 if (!EncounteredME) { 7730 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7731 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7732 // as MEMBER_OF the parent struct. 7733 if (EncounteredME) { 7734 ShouldBeMemberOf = true; 7735 // Do not emit as complex pointer if this is actually not array-like 7736 // expression. 7737 if (FirstPointerInComplexData) { 7738 QualType Ty = std::prev(I) 7739 ->getAssociatedDeclaration() 7740 ->getType() 7741 .getNonReferenceType(); 7742 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7743 FirstPointerInComplexData = false; 7744 } 7745 } 7746 } 7747 7748 auto Next = std::next(I); 7749 7750 // We need to generate the addresses and sizes if this is the last 7751 // component, if the component is a pointer or if it is an array section 7752 // whose length can't be proved to be one. If this is a pointer, it 7753 // becomes the base address for the following components. 7754 7755 // A final array section, is one whose length can't be proved to be one. 7756 // If the map item is non-contiguous then we don't treat any array section 7757 // as final array section. 7758 bool IsFinalArraySection = 7759 !IsNonContiguous && 7760 isFinalArraySectionExpression(I->getAssociatedExpression()); 7761 7762 // If we have a declaration for the mapping use that, otherwise use 7763 // the base declaration of the map clause. 7764 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7765 ? I->getAssociatedDeclaration() 7766 : BaseDecl; 7767 7768 // Get information on whether the element is a pointer. Have to do a 7769 // special treatment for array sections given that they are built-in 7770 // types. 7771 const auto *OASE = 7772 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7773 const auto *OAShE = 7774 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7775 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7776 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7777 bool IsPointer = 7778 OAShE || 7779 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7780 .getCanonicalType() 7781 ->isAnyPointerType()) || 7782 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7783 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7784 MapDecl && 7785 MapDecl->getType()->isLValueReferenceType(); 7786 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7787 7788 if (OASE) 7789 ++DimSize; 7790 7791 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7792 IsFinalArraySection) { 7793 // If this is not the last component, we expect the pointer to be 7794 // associated with an array expression or member expression. 7795 assert((Next == CE || 7796 isa<MemberExpr>(Next->getAssociatedExpression()) || 7797 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7798 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7799 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7800 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7801 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7802 "Unexpected expression"); 7803 7804 Address LB = Address::invalid(); 7805 Address LowestElem = Address::invalid(); 7806 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7807 const MemberExpr *E) { 7808 const Expr *BaseExpr = E->getBase(); 7809 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7810 // scalar. 7811 LValue BaseLV; 7812 if (E->isArrow()) { 7813 LValueBaseInfo BaseInfo; 7814 TBAAAccessInfo TBAAInfo; 7815 Address Addr = 7816 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7817 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7818 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7819 } else { 7820 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7821 } 7822 return BaseLV; 7823 }; 7824 if (OAShE) { 7825 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7826 CGF.getContext().getTypeAlignInChars( 7827 OAShE->getBase()->getType())); 7828 } else if (IsMemberReference) { 7829 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7830 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7831 LowestElem = CGF.EmitLValueForFieldInitialization( 7832 BaseLVal, cast<FieldDecl>(MapDecl)) 7833 .getAddress(CGF); 7834 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7835 .getAddress(CGF); 7836 } else { 7837 LowestElem = LB = 7838 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7839 .getAddress(CGF); 7840 } 7841 7842 // If this component is a pointer inside the base struct then we don't 7843 // need to create any entry for it - it will be combined with the object 7844 // it is pointing to into a single PTR_AND_OBJ entry. 7845 bool IsMemberPointerOrAddr = 7846 EncounteredME && 7847 (((IsPointer || ForDeviceAddr) && 7848 I->getAssociatedExpression() == EncounteredME) || 7849 (IsPrevMemberReference && !IsPointer) || 7850 (IsMemberReference && Next != CE && 7851 !Next->getAssociatedExpression()->getType()->isPointerType())); 7852 if (!OverlappedElements.empty() && Next == CE) { 7853 // Handle base element with the info for overlapped elements. 7854 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7855 assert(!IsPointer && 7856 "Unexpected base element with the pointer type."); 7857 // Mark the whole struct as the struct that requires allocation on the 7858 // device. 7859 PartialStruct.LowestElem = {0, LowestElem}; 7860 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7861 I->getAssociatedExpression()->getType()); 7862 Address HB = CGF.Builder.CreateConstGEP( 7863 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7864 CGF.VoidPtrTy), 7865 TypeSize.getQuantity() - 1); 7866 PartialStruct.HighestElem = { 7867 std::numeric_limits<decltype( 7868 PartialStruct.HighestElem.first)>::max(), 7869 HB}; 7870 PartialStruct.Base = BP; 7871 PartialStruct.LB = LB; 7872 assert( 7873 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7874 "Overlapped elements must be used only once for the variable."); 7875 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7876 // Emit data for non-overlapped data. 7877 OpenMPOffloadMappingFlags Flags = 7878 OMP_MAP_MEMBER_OF | 7879 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7880 /*AddPtrFlag=*/false, 7881 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7882 llvm::Value *Size = nullptr; 7883 // Do bitcopy of all non-overlapped structure elements. 7884 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7885 Component : OverlappedElements) { 7886 Address ComponentLB = Address::invalid(); 7887 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7888 Component) { 7889 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7890 const auto *FD = dyn_cast<FieldDecl>(VD); 7891 if (FD && FD->getType()->isLValueReferenceType()) { 7892 const auto *ME = 7893 cast<MemberExpr>(MC.getAssociatedExpression()); 7894 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7895 ComponentLB = 7896 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7897 .getAddress(CGF); 7898 } else { 7899 ComponentLB = 7900 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7901 .getAddress(CGF); 7902 } 7903 Size = CGF.Builder.CreatePtrDiff( 7904 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7905 CGF.EmitCastToVoidPtr(LB.getPointer())); 7906 break; 7907 } 7908 } 7909 assert(Size && "Failed to determine structure size"); 7910 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7911 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7912 CombinedInfo.Pointers.push_back(LB.getPointer()); 7913 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7914 Size, CGF.Int64Ty, /*isSigned=*/true)); 7915 CombinedInfo.Types.push_back(Flags); 7916 CombinedInfo.Mappers.push_back(nullptr); 7917 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7918 : 1); 7919 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7920 } 7921 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7922 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7923 CombinedInfo.Pointers.push_back(LB.getPointer()); 7924 Size = CGF.Builder.CreatePtrDiff( 7925 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7926 CGF.EmitCastToVoidPtr(LB.getPointer())); 7927 CombinedInfo.Sizes.push_back( 7928 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7929 CombinedInfo.Types.push_back(Flags); 7930 CombinedInfo.Mappers.push_back(nullptr); 7931 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7932 : 1); 7933 break; 7934 } 7935 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7936 if (!IsMemberPointerOrAddr || 7937 (Next == CE && MapType != OMPC_MAP_unknown)) { 7938 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7939 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7940 CombinedInfo.Pointers.push_back(LB.getPointer()); 7941 CombinedInfo.Sizes.push_back( 7942 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7943 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7944 : 1); 7945 7946 // If Mapper is valid, the last component inherits the mapper. 7947 bool HasMapper = Mapper && Next == CE; 7948 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7949 7950 // We need to add a pointer flag for each map that comes from the 7951 // same expression except for the first one. We also need to signal 7952 // this map is the first one that relates with the current capture 7953 // (there is a set of entries for each capture). 7954 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7955 MapType, MapModifiers, MotionModifiers, IsImplicit, 7956 !IsExpressionFirstInfo || RequiresReference || 7957 FirstPointerInComplexData || IsMemberReference, 7958 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7959 7960 if (!IsExpressionFirstInfo || IsMemberReference) { 7961 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7962 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7963 if (IsPointer || (IsMemberReference && Next != CE)) 7964 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7965 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7966 7967 if (ShouldBeMemberOf) { 7968 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7969 // should be later updated with the correct value of MEMBER_OF. 7970 Flags |= OMP_MAP_MEMBER_OF; 7971 // From now on, all subsequent PTR_AND_OBJ entries should not be 7972 // marked as MEMBER_OF. 7973 ShouldBeMemberOf = false; 7974 } 7975 } 7976 7977 CombinedInfo.Types.push_back(Flags); 7978 } 7979 7980 // If we have encountered a member expression so far, keep track of the 7981 // mapped member. If the parent is "*this", then the value declaration 7982 // is nullptr. 7983 if (EncounteredME) { 7984 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7985 unsigned FieldIndex = FD->getFieldIndex(); 7986 7987 // Update info about the lowest and highest elements for this struct 7988 if (!PartialStruct.Base.isValid()) { 7989 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7990 if (IsFinalArraySection) { 7991 Address HB = 7992 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7993 .getAddress(CGF); 7994 PartialStruct.HighestElem = {FieldIndex, HB}; 7995 } else { 7996 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7997 } 7998 PartialStruct.Base = BP; 7999 PartialStruct.LB = BP; 8000 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8001 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8002 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8003 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8004 } 8005 } 8006 8007 // Need to emit combined struct for array sections. 8008 if (IsFinalArraySection || IsNonContiguous) 8009 PartialStruct.IsArraySection = true; 8010 8011 // If we have a final array section, we are done with this expression. 8012 if (IsFinalArraySection) 8013 break; 8014 8015 // The pointer becomes the base for the next element. 8016 if (Next != CE) 8017 BP = IsMemberReference ? LowestElem : LB; 8018 8019 IsExpressionFirstInfo = false; 8020 IsCaptureFirstInfo = false; 8021 FirstPointerInComplexData = false; 8022 IsPrevMemberReference = IsMemberReference; 8023 } else if (FirstPointerInComplexData) { 8024 QualType Ty = Components.rbegin() 8025 ->getAssociatedDeclaration() 8026 ->getType() 8027 .getNonReferenceType(); 8028 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8029 FirstPointerInComplexData = false; 8030 } 8031 } 8032 // If ran into the whole component - allocate the space for the whole 8033 // record. 8034 if (!EncounteredME) 8035 PartialStruct.HasCompleteRecord = true; 8036 8037 if (!IsNonContiguous) 8038 return; 8039 8040 const ASTContext &Context = CGF.getContext(); 8041 8042 // For supporting stride in array section, we need to initialize the first 8043 // dimension size as 1, first offset as 0, and first count as 1 8044 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8045 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8046 MapValuesArrayTy CurStrides; 8047 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8048 uint64_t ElementTypeSize; 8049 8050 // Collect Size information for each dimension and get the element size as 8051 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8052 // should be [10, 10] and the first stride is 4 btyes. 8053 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8054 Components) { 8055 const Expr *AssocExpr = Component.getAssociatedExpression(); 8056 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8057 8058 if (!OASE) 8059 continue; 8060 8061 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8062 auto *CAT = Context.getAsConstantArrayType(Ty); 8063 auto *VAT = Context.getAsVariableArrayType(Ty); 8064 8065 // We need all the dimension size except for the last dimension. 8066 assert((VAT || CAT || &Component == &*Components.begin()) && 8067 "Should be either ConstantArray or VariableArray if not the " 8068 "first Component"); 8069 8070 // Get element size if CurStrides is empty. 8071 if (CurStrides.empty()) { 8072 const Type *ElementType = nullptr; 8073 if (CAT) 8074 ElementType = CAT->getElementType().getTypePtr(); 8075 else if (VAT) 8076 ElementType = VAT->getElementType().getTypePtr(); 8077 else 8078 assert(&Component == &*Components.begin() && 8079 "Only expect pointer (non CAT or VAT) when this is the " 8080 "first Component"); 8081 // If ElementType is null, then it means the base is a pointer 8082 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8083 // for next iteration. 8084 if (ElementType) { 8085 // For the case that having pointer as base, we need to remove one 8086 // level of indirection. 8087 if (&Component != &*Components.begin()) 8088 ElementType = ElementType->getPointeeOrArrayElementType(); 8089 ElementTypeSize = 8090 Context.getTypeSizeInChars(ElementType).getQuantity(); 8091 CurStrides.push_back( 8092 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8093 } 8094 } 8095 // Get dimension value except for the last dimension since we don't need 8096 // it. 8097 if (DimSizes.size() < Components.size() - 1) { 8098 if (CAT) 8099 DimSizes.push_back(llvm::ConstantInt::get( 8100 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8101 else if (VAT) 8102 DimSizes.push_back(CGF.Builder.CreateIntCast( 8103 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8104 /*IsSigned=*/false)); 8105 } 8106 } 8107 8108 // Skip the dummy dimension since we have already have its information. 8109 auto DI = DimSizes.begin() + 1; 8110 // Product of dimension. 8111 llvm::Value *DimProd = 8112 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8113 8114 // Collect info for non-contiguous. Notice that offset, count, and stride 8115 // are only meaningful for array-section, so we insert a null for anything 8116 // other than array-section. 8117 // Also, the size of offset, count, and stride are not the same as 8118 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8119 // count, and stride are the same as the number of non-contiguous 8120 // declaration in target update to/from clause. 8121 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8122 Components) { 8123 const Expr *AssocExpr = Component.getAssociatedExpression(); 8124 8125 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8126 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8127 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8128 /*isSigned=*/false); 8129 CurOffsets.push_back(Offset); 8130 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8131 CurStrides.push_back(CurStrides.back()); 8132 continue; 8133 } 8134 8135 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8136 8137 if (!OASE) 8138 continue; 8139 8140 // Offset 8141 const Expr *OffsetExpr = OASE->getLowerBound(); 8142 llvm::Value *Offset = nullptr; 8143 if (!OffsetExpr) { 8144 // If offset is absent, then we just set it to zero. 8145 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8146 } else { 8147 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8148 CGF.Int64Ty, 8149 /*isSigned=*/false); 8150 } 8151 CurOffsets.push_back(Offset); 8152 8153 // Count 8154 const Expr *CountExpr = OASE->getLength(); 8155 llvm::Value *Count = nullptr; 8156 if (!CountExpr) { 8157 // In Clang, once a high dimension is an array section, we construct all 8158 // the lower dimension as array section, however, for case like 8159 // arr[0:2][2], Clang construct the inner dimension as an array section 8160 // but it actually is not in an array section form according to spec. 8161 if (!OASE->getColonLocFirst().isValid() && 8162 !OASE->getColonLocSecond().isValid()) { 8163 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8164 } else { 8165 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8166 // When the length is absent it defaults to ⌈(size − 8167 // lower-bound)/stride⌉, where size is the size of the array 8168 // dimension. 8169 const Expr *StrideExpr = OASE->getStride(); 8170 llvm::Value *Stride = 8171 StrideExpr 8172 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8173 CGF.Int64Ty, /*isSigned=*/false) 8174 : nullptr; 8175 if (Stride) 8176 Count = CGF.Builder.CreateUDiv( 8177 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8178 else 8179 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8180 } 8181 } else { 8182 Count = CGF.EmitScalarExpr(CountExpr); 8183 } 8184 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8185 CurCounts.push_back(Count); 8186 8187 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8188 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8189 // Offset Count Stride 8190 // D0 0 1 4 (int) <- dummy dimension 8191 // D1 0 2 8 (2 * (1) * 4) 8192 // D2 1 2 20 (1 * (1 * 5) * 4) 8193 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8194 const Expr *StrideExpr = OASE->getStride(); 8195 llvm::Value *Stride = 8196 StrideExpr 8197 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8198 CGF.Int64Ty, /*isSigned=*/false) 8199 : nullptr; 8200 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8201 if (Stride) 8202 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8203 else 8204 CurStrides.push_back(DimProd); 8205 if (DI != DimSizes.end()) 8206 ++DI; 8207 } 8208 8209 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8210 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8211 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8212 } 8213 8214 /// Return the adjusted map modifiers if the declaration a capture refers to 8215 /// appears in a first-private clause. This is expected to be used only with 8216 /// directives that start with 'target'. 8217 MappableExprsHandler::OpenMPOffloadMappingFlags 8218 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8219 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8220 8221 // A first private variable captured by reference will use only the 8222 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8223 // declaration is known as first-private in this handler. 8224 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8225 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8226 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8227 return MappableExprsHandler::OMP_MAP_ALWAYS | 8228 MappableExprsHandler::OMP_MAP_TO; 8229 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8230 return MappableExprsHandler::OMP_MAP_TO | 8231 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8232 return MappableExprsHandler::OMP_MAP_PRIVATE | 8233 MappableExprsHandler::OMP_MAP_TO; 8234 } 8235 return MappableExprsHandler::OMP_MAP_TO | 8236 MappableExprsHandler::OMP_MAP_FROM; 8237 } 8238 8239 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8240 // Rotate by getFlagMemberOffset() bits. 8241 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8242 << getFlagMemberOffset()); 8243 } 8244 8245 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8246 OpenMPOffloadMappingFlags MemberOfFlag) { 8247 // If the entry is PTR_AND_OBJ but has not been marked with the special 8248 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8249 // marked as MEMBER_OF. 8250 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8251 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8252 return; 8253 8254 // Reset the placeholder value to prepare the flag for the assignment of the 8255 // proper MEMBER_OF value. 8256 Flags &= ~OMP_MAP_MEMBER_OF; 8257 Flags |= MemberOfFlag; 8258 } 8259 8260 void getPlainLayout(const CXXRecordDecl *RD, 8261 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8262 bool AsBase) const { 8263 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8264 8265 llvm::StructType *St = 8266 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8267 8268 unsigned NumElements = St->getNumElements(); 8269 llvm::SmallVector< 8270 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8271 RecordLayout(NumElements); 8272 8273 // Fill bases. 8274 for (const auto &I : RD->bases()) { 8275 if (I.isVirtual()) 8276 continue; 8277 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8278 // Ignore empty bases. 8279 if (Base->isEmpty() || CGF.getContext() 8280 .getASTRecordLayout(Base) 8281 .getNonVirtualSize() 8282 .isZero()) 8283 continue; 8284 8285 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8286 RecordLayout[FieldIndex] = Base; 8287 } 8288 // Fill in virtual bases. 8289 for (const auto &I : RD->vbases()) { 8290 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8291 // Ignore empty bases. 8292 if (Base->isEmpty()) 8293 continue; 8294 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8295 if (RecordLayout[FieldIndex]) 8296 continue; 8297 RecordLayout[FieldIndex] = Base; 8298 } 8299 // Fill in all the fields. 8300 assert(!RD->isUnion() && "Unexpected union."); 8301 for (const auto *Field : RD->fields()) { 8302 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8303 // will fill in later.) 8304 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8305 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8306 RecordLayout[FieldIndex] = Field; 8307 } 8308 } 8309 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8310 &Data : RecordLayout) { 8311 if (Data.isNull()) 8312 continue; 8313 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8314 getPlainLayout(Base, Layout, /*AsBase=*/true); 8315 else 8316 Layout.push_back(Data.get<const FieldDecl *>()); 8317 } 8318 } 8319 8320 /// Generate all the base pointers, section pointers, sizes, map types, and 8321 /// mappers for the extracted mappable expressions (all included in \a 8322 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8323 /// pair of the relevant declaration and index where it occurs is appended to 8324 /// the device pointers info array. 8325 void generateAllInfoForClauses( 8326 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8327 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8328 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8329 // We have to process the component lists that relate with the same 8330 // declaration in a single chunk so that we can generate the map flags 8331 // correctly. Therefore, we organize all lists in a map. 8332 enum MapKind { Present, Allocs, Other, Total }; 8333 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8334 SmallVector<SmallVector<MapInfo, 8>, 4>> 8335 Info; 8336 8337 // Helper function to fill the information map for the different supported 8338 // clauses. 8339 auto &&InfoGen = 8340 [&Info, &SkipVarSet]( 8341 const ValueDecl *D, MapKind Kind, 8342 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8343 OpenMPMapClauseKind MapType, 8344 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8345 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8346 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8347 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8348 if (SkipVarSet.contains(D)) 8349 return; 8350 auto It = Info.find(D); 8351 if (It == Info.end()) 8352 It = Info 8353 .insert(std::make_pair( 8354 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8355 .first; 8356 It->second[Kind].emplace_back( 8357 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8358 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8359 }; 8360 8361 for (const auto *Cl : Clauses) { 8362 const auto *C = dyn_cast<OMPMapClause>(Cl); 8363 if (!C) 8364 continue; 8365 MapKind Kind = Other; 8366 if (!C->getMapTypeModifiers().empty() && 8367 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8368 return K == OMPC_MAP_MODIFIER_present; 8369 })) 8370 Kind = Present; 8371 else if (C->getMapType() == OMPC_MAP_alloc) 8372 Kind = Allocs; 8373 const auto *EI = C->getVarRefs().begin(); 8374 for (const auto L : C->component_lists()) { 8375 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8376 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8377 C->getMapTypeModifiers(), llvm::None, 8378 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8379 E); 8380 ++EI; 8381 } 8382 } 8383 for (const auto *Cl : Clauses) { 8384 const auto *C = dyn_cast<OMPToClause>(Cl); 8385 if (!C) 8386 continue; 8387 MapKind Kind = Other; 8388 if (!C->getMotionModifiers().empty() && 8389 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8390 return K == OMPC_MOTION_MODIFIER_present; 8391 })) 8392 Kind = Present; 8393 const auto *EI = C->getVarRefs().begin(); 8394 for (const auto L : C->component_lists()) { 8395 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8396 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8397 C->isImplicit(), std::get<2>(L), *EI); 8398 ++EI; 8399 } 8400 } 8401 for (const auto *Cl : Clauses) { 8402 const auto *C = dyn_cast<OMPFromClause>(Cl); 8403 if (!C) 8404 continue; 8405 MapKind Kind = Other; 8406 if (!C->getMotionModifiers().empty() && 8407 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8408 return K == OMPC_MOTION_MODIFIER_present; 8409 })) 8410 Kind = Present; 8411 const auto *EI = C->getVarRefs().begin(); 8412 for (const auto L : C->component_lists()) { 8413 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8414 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8415 C->isImplicit(), std::get<2>(L), *EI); 8416 ++EI; 8417 } 8418 } 8419 8420 // Look at the use_device_ptr clause information and mark the existing map 8421 // entries as such. If there is no map information for an entry in the 8422 // use_device_ptr list, we create one with map type 'alloc' and zero size 8423 // section. It is the user fault if that was not mapped before. If there is 8424 // no map information and the pointer is a struct member, then we defer the 8425 // emission of that entry until the whole struct has been processed. 8426 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8427 SmallVector<DeferredDevicePtrEntryTy, 4>> 8428 DeferredInfo; 8429 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8430 8431 for (const auto *Cl : Clauses) { 8432 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8433 if (!C) 8434 continue; 8435 for (const auto L : C->component_lists()) { 8436 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8437 std::get<1>(L); 8438 assert(!Components.empty() && 8439 "Not expecting empty list of components!"); 8440 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8441 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8442 const Expr *IE = Components.back().getAssociatedExpression(); 8443 // If the first component is a member expression, we have to look into 8444 // 'this', which maps to null in the map of map information. Otherwise 8445 // look directly for the information. 8446 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8447 8448 // We potentially have map information for this declaration already. 8449 // Look for the first set of components that refer to it. 8450 if (It != Info.end()) { 8451 bool Found = false; 8452 for (auto &Data : It->second) { 8453 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8454 return MI.Components.back().getAssociatedDeclaration() == VD; 8455 }); 8456 // If we found a map entry, signal that the pointer has to be 8457 // returned and move on to the next declaration. Exclude cases where 8458 // the base pointer is mapped as array subscript, array section or 8459 // array shaping. The base address is passed as a pointer to base in 8460 // this case and cannot be used as a base for use_device_ptr list 8461 // item. 8462 if (CI != Data.end()) { 8463 auto PrevCI = std::next(CI->Components.rbegin()); 8464 const auto *VarD = dyn_cast<VarDecl>(VD); 8465 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8466 isa<MemberExpr>(IE) || 8467 !VD->getType().getNonReferenceType()->isPointerType() || 8468 PrevCI == CI->Components.rend() || 8469 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8470 VarD->hasLocalStorage()) { 8471 CI->ReturnDevicePointer = true; 8472 Found = true; 8473 break; 8474 } 8475 } 8476 } 8477 if (Found) 8478 continue; 8479 } 8480 8481 // We didn't find any match in our map information - generate a zero 8482 // size array section - if the pointer is a struct member we defer this 8483 // action until the whole struct has been processed. 8484 if (isa<MemberExpr>(IE)) { 8485 // Insert the pointer into Info to be processed by 8486 // generateInfoForComponentList. Because it is a member pointer 8487 // without a pointee, no entry will be generated for it, therefore 8488 // we need to generate one after the whole struct has been processed. 8489 // Nonetheless, generateInfoForComponentList must be called to take 8490 // the pointer into account for the calculation of the range of the 8491 // partial struct. 8492 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8493 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8494 nullptr); 8495 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8496 } else { 8497 llvm::Value *Ptr = 8498 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8499 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8500 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8501 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8502 UseDevicePtrCombinedInfo.Sizes.push_back( 8503 llvm::Constant::getNullValue(CGF.Int64Ty)); 8504 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8505 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8506 } 8507 } 8508 } 8509 8510 // Look at the use_device_addr clause information and mark the existing map 8511 // entries as such. If there is no map information for an entry in the 8512 // use_device_addr list, we create one with map type 'alloc' and zero size 8513 // section. It is the user fault if that was not mapped before. If there is 8514 // no map information and the pointer is a struct member, then we defer the 8515 // emission of that entry until the whole struct has been processed. 8516 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8517 for (const auto *Cl : Clauses) { 8518 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8519 if (!C) 8520 continue; 8521 for (const auto L : C->component_lists()) { 8522 assert(!std::get<1>(L).empty() && 8523 "Not expecting empty list of components!"); 8524 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8525 if (!Processed.insert(VD).second) 8526 continue; 8527 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8528 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8529 // If the first component is a member expression, we have to look into 8530 // 'this', which maps to null in the map of map information. Otherwise 8531 // look directly for the information. 8532 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8533 8534 // We potentially have map information for this declaration already. 8535 // Look for the first set of components that refer to it. 8536 if (It != Info.end()) { 8537 bool Found = false; 8538 for (auto &Data : It->second) { 8539 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8540 return MI.Components.back().getAssociatedDeclaration() == VD; 8541 }); 8542 // If we found a map entry, signal that the pointer has to be 8543 // returned and move on to the next declaration. 8544 if (CI != Data.end()) { 8545 CI->ReturnDevicePointer = true; 8546 Found = true; 8547 break; 8548 } 8549 } 8550 if (Found) 8551 continue; 8552 } 8553 8554 // We didn't find any match in our map information - generate a zero 8555 // size array section - if the pointer is a struct member we defer this 8556 // action until the whole struct has been processed. 8557 if (isa<MemberExpr>(IE)) { 8558 // Insert the pointer into Info to be processed by 8559 // generateInfoForComponentList. Because it is a member pointer 8560 // without a pointee, no entry will be generated for it, therefore 8561 // we need to generate one after the whole struct has been processed. 8562 // Nonetheless, generateInfoForComponentList must be called to take 8563 // the pointer into account for the calculation of the range of the 8564 // partial struct. 8565 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8566 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8567 nullptr, nullptr, /*ForDeviceAddr=*/true); 8568 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8569 } else { 8570 llvm::Value *Ptr; 8571 if (IE->isGLValue()) 8572 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8573 else 8574 Ptr = CGF.EmitScalarExpr(IE); 8575 CombinedInfo.Exprs.push_back(VD); 8576 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8577 CombinedInfo.Pointers.push_back(Ptr); 8578 CombinedInfo.Sizes.push_back( 8579 llvm::Constant::getNullValue(CGF.Int64Ty)); 8580 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8581 CombinedInfo.Mappers.push_back(nullptr); 8582 } 8583 } 8584 } 8585 8586 for (const auto &Data : Info) { 8587 StructRangeInfoTy PartialStruct; 8588 // Temporary generated information. 8589 MapCombinedInfoTy CurInfo; 8590 const Decl *D = Data.first; 8591 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8592 for (const auto &M : Data.second) { 8593 for (const MapInfo &L : M) { 8594 assert(!L.Components.empty() && 8595 "Not expecting declaration with no component lists."); 8596 8597 // Remember the current base pointer index. 8598 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8599 CurInfo.NonContigInfo.IsNonContiguous = 8600 L.Components.back().isNonContiguous(); 8601 generateInfoForComponentList( 8602 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8603 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8604 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8605 8606 // If this entry relates with a device pointer, set the relevant 8607 // declaration and add the 'return pointer' flag. 8608 if (L.ReturnDevicePointer) { 8609 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8610 "Unexpected number of mapped base pointers."); 8611 8612 const ValueDecl *RelevantVD = 8613 L.Components.back().getAssociatedDeclaration(); 8614 assert(RelevantVD && 8615 "No relevant declaration related with device pointer??"); 8616 8617 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8618 RelevantVD); 8619 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8620 } 8621 } 8622 } 8623 8624 // Append any pending zero-length pointers which are struct members and 8625 // used with use_device_ptr or use_device_addr. 8626 auto CI = DeferredInfo.find(Data.first); 8627 if (CI != DeferredInfo.end()) { 8628 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8629 llvm::Value *BasePtr; 8630 llvm::Value *Ptr; 8631 if (L.ForDeviceAddr) { 8632 if (L.IE->isGLValue()) 8633 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8634 else 8635 Ptr = this->CGF.EmitScalarExpr(L.IE); 8636 BasePtr = Ptr; 8637 // Entry is RETURN_PARAM. Also, set the placeholder value 8638 // MEMBER_OF=FFFF so that the entry is later updated with the 8639 // correct value of MEMBER_OF. 8640 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8641 } else { 8642 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8643 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8644 L.IE->getExprLoc()); 8645 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8646 // placeholder value MEMBER_OF=FFFF so that the entry is later 8647 // updated with the correct value of MEMBER_OF. 8648 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8649 OMP_MAP_MEMBER_OF); 8650 } 8651 CurInfo.Exprs.push_back(L.VD); 8652 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8653 CurInfo.Pointers.push_back(Ptr); 8654 CurInfo.Sizes.push_back( 8655 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8656 CurInfo.Mappers.push_back(nullptr); 8657 } 8658 } 8659 // If there is an entry in PartialStruct it means we have a struct with 8660 // individual members mapped. Emit an extra combined entry. 8661 if (PartialStruct.Base.isValid()) { 8662 CurInfo.NonContigInfo.Dims.push_back(0); 8663 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8664 } 8665 8666 // We need to append the results of this capture to what we already 8667 // have. 8668 CombinedInfo.append(CurInfo); 8669 } 8670 // Append data for use_device_ptr clauses. 8671 CombinedInfo.append(UseDevicePtrCombinedInfo); 8672 } 8673 8674 public: 8675 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8676 : CurDir(&Dir), CGF(CGF) { 8677 // Extract firstprivate clause information. 8678 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8679 for (const auto *D : C->varlists()) 8680 FirstPrivateDecls.try_emplace( 8681 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8682 // Extract implicit firstprivates from uses_allocators clauses. 8683 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8684 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8685 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8686 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8687 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8688 /*Implicit=*/true); 8689 else if (const auto *VD = dyn_cast<VarDecl>( 8690 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8691 ->getDecl())) 8692 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8693 } 8694 } 8695 // Extract device pointer clause information. 8696 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8697 for (auto L : C->component_lists()) 8698 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8699 } 8700 8701 /// Constructor for the declare mapper directive. 8702 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8703 : CurDir(&Dir), CGF(CGF) {} 8704 8705 /// Generate code for the combined entry if we have a partially mapped struct 8706 /// and take care of the mapping flags of the arguments corresponding to 8707 /// individual struct members. 8708 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8709 MapFlagsArrayTy &CurTypes, 8710 const StructRangeInfoTy &PartialStruct, 8711 const ValueDecl *VD = nullptr, 8712 bool NotTargetParams = true) const { 8713 if (CurTypes.size() == 1 && 8714 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8715 !PartialStruct.IsArraySection) 8716 return; 8717 Address LBAddr = PartialStruct.LowestElem.second; 8718 Address HBAddr = PartialStruct.HighestElem.second; 8719 if (PartialStruct.HasCompleteRecord) { 8720 LBAddr = PartialStruct.LB; 8721 HBAddr = PartialStruct.LB; 8722 } 8723 CombinedInfo.Exprs.push_back(VD); 8724 // Base is the base of the struct 8725 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8726 // Pointer is the address of the lowest element 8727 llvm::Value *LB = LBAddr.getPointer(); 8728 CombinedInfo.Pointers.push_back(LB); 8729 // There should not be a mapper for a combined entry. 8730 CombinedInfo.Mappers.push_back(nullptr); 8731 // Size is (addr of {highest+1} element) - (addr of lowest element) 8732 llvm::Value *HB = HBAddr.getPointer(); 8733 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8734 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8735 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8736 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8737 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8738 /*isSigned=*/false); 8739 CombinedInfo.Sizes.push_back(Size); 8740 // Map type is always TARGET_PARAM, if generate info for captures. 8741 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8742 : OMP_MAP_TARGET_PARAM); 8743 // If any element has the present modifier, then make sure the runtime 8744 // doesn't attempt to allocate the struct. 8745 if (CurTypes.end() != 8746 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8747 return Type & OMP_MAP_PRESENT; 8748 })) 8749 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8750 // Remove TARGET_PARAM flag from the first element 8751 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8752 8753 // All other current entries will be MEMBER_OF the combined entry 8754 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8755 // 0xFFFF in the MEMBER_OF field). 8756 OpenMPOffloadMappingFlags MemberOfFlag = 8757 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8758 for (auto &M : CurTypes) 8759 setCorrectMemberOfFlag(M, MemberOfFlag); 8760 } 8761 8762 /// Generate all the base pointers, section pointers, sizes, map types, and 8763 /// mappers for the extracted mappable expressions (all included in \a 8764 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8765 /// pair of the relevant declaration and index where it occurs is appended to 8766 /// the device pointers info array. 8767 void generateAllInfo( 8768 MapCombinedInfoTy &CombinedInfo, 8769 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8770 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8771 assert(CurDir.is<const OMPExecutableDirective *>() && 8772 "Expect a executable directive"); 8773 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8774 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8775 } 8776 8777 /// Generate all the base pointers, section pointers, sizes, map types, and 8778 /// mappers for the extracted map clauses of user-defined mapper (all included 8779 /// in \a CombinedInfo). 8780 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8781 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8782 "Expect a declare mapper directive"); 8783 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8784 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8785 } 8786 8787 /// Emit capture info for lambdas for variables captured by reference. 8788 void generateInfoForLambdaCaptures( 8789 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8790 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8791 const auto *RD = VD->getType() 8792 .getCanonicalType() 8793 .getNonReferenceType() 8794 ->getAsCXXRecordDecl(); 8795 if (!RD || !RD->isLambda()) 8796 return; 8797 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8798 LValue VDLVal = CGF.MakeAddrLValue( 8799 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8800 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8801 FieldDecl *ThisCapture = nullptr; 8802 RD->getCaptureFields(Captures, ThisCapture); 8803 if (ThisCapture) { 8804 LValue ThisLVal = 8805 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8806 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8807 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8808 VDLVal.getPointer(CGF)); 8809 CombinedInfo.Exprs.push_back(VD); 8810 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8811 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8812 CombinedInfo.Sizes.push_back( 8813 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8814 CGF.Int64Ty, /*isSigned=*/true)); 8815 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8816 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8817 CombinedInfo.Mappers.push_back(nullptr); 8818 } 8819 for (const LambdaCapture &LC : RD->captures()) { 8820 if (!LC.capturesVariable()) 8821 continue; 8822 const VarDecl *VD = LC.getCapturedVar(); 8823 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8824 continue; 8825 auto It = Captures.find(VD); 8826 assert(It != Captures.end() && "Found lambda capture without field."); 8827 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8828 if (LC.getCaptureKind() == LCK_ByRef) { 8829 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8830 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8831 VDLVal.getPointer(CGF)); 8832 CombinedInfo.Exprs.push_back(VD); 8833 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8834 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8835 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8836 CGF.getTypeSize( 8837 VD->getType().getCanonicalType().getNonReferenceType()), 8838 CGF.Int64Ty, /*isSigned=*/true)); 8839 } else { 8840 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8841 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8842 VDLVal.getPointer(CGF)); 8843 CombinedInfo.Exprs.push_back(VD); 8844 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8845 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8846 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8847 } 8848 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8849 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8850 CombinedInfo.Mappers.push_back(nullptr); 8851 } 8852 } 8853 8854 /// Set correct indices for lambdas captures. 8855 void adjustMemberOfForLambdaCaptures( 8856 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8857 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8858 MapFlagsArrayTy &Types) const { 8859 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8860 // Set correct member_of idx for all implicit lambda captures. 8861 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8862 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8863 continue; 8864 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8865 assert(BasePtr && "Unable to find base lambda address."); 8866 int TgtIdx = -1; 8867 for (unsigned J = I; J > 0; --J) { 8868 unsigned Idx = J - 1; 8869 if (Pointers[Idx] != BasePtr) 8870 continue; 8871 TgtIdx = Idx; 8872 break; 8873 } 8874 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8875 // All other current entries will be MEMBER_OF the combined entry 8876 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8877 // 0xFFFF in the MEMBER_OF field). 8878 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8879 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8880 } 8881 } 8882 8883 /// Generate the base pointers, section pointers, sizes, map types, and 8884 /// mappers associated to a given capture (all included in \a CombinedInfo). 8885 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8886 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8887 StructRangeInfoTy &PartialStruct) const { 8888 assert(!Cap->capturesVariableArrayType() && 8889 "Not expecting to generate map info for a variable array type!"); 8890 8891 // We need to know when we generating information for the first component 8892 const ValueDecl *VD = Cap->capturesThis() 8893 ? nullptr 8894 : Cap->getCapturedVar()->getCanonicalDecl(); 8895 8896 // If this declaration appears in a is_device_ptr clause we just have to 8897 // pass the pointer by value. If it is a reference to a declaration, we just 8898 // pass its value. 8899 if (DevPointersMap.count(VD)) { 8900 CombinedInfo.Exprs.push_back(VD); 8901 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8902 CombinedInfo.Pointers.push_back(Arg); 8903 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8904 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8905 /*isSigned=*/true)); 8906 CombinedInfo.Types.push_back( 8907 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8908 OMP_MAP_TARGET_PARAM); 8909 CombinedInfo.Mappers.push_back(nullptr); 8910 return; 8911 } 8912 8913 using MapData = 8914 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8915 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8916 const ValueDecl *, const Expr *>; 8917 SmallVector<MapData, 4> DeclComponentLists; 8918 assert(CurDir.is<const OMPExecutableDirective *>() && 8919 "Expect a executable directive"); 8920 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8921 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8922 const auto *EI = C->getVarRefs().begin(); 8923 for (const auto L : C->decl_component_lists(VD)) { 8924 const ValueDecl *VDecl, *Mapper; 8925 // The Expression is not correct if the mapping is implicit 8926 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8927 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8928 std::tie(VDecl, Components, Mapper) = L; 8929 assert(VDecl == VD && "We got information for the wrong declaration??"); 8930 assert(!Components.empty() && 8931 "Not expecting declaration with no component lists."); 8932 DeclComponentLists.emplace_back(Components, C->getMapType(), 8933 C->getMapTypeModifiers(), 8934 C->isImplicit(), Mapper, E); 8935 ++EI; 8936 } 8937 } 8938 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8939 const MapData &RHS) { 8940 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8941 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8942 bool HasPresent = !MapModifiers.empty() && 8943 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8944 return K == clang::OMPC_MAP_MODIFIER_present; 8945 }); 8946 bool HasAllocs = MapType == OMPC_MAP_alloc; 8947 MapModifiers = std::get<2>(RHS); 8948 MapType = std::get<1>(LHS); 8949 bool HasPresentR = 8950 !MapModifiers.empty() && 8951 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8952 return K == clang::OMPC_MAP_MODIFIER_present; 8953 }); 8954 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8955 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8956 }); 8957 8958 // Find overlapping elements (including the offset from the base element). 8959 llvm::SmallDenseMap< 8960 const MapData *, 8961 llvm::SmallVector< 8962 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8963 4> 8964 OverlappedData; 8965 size_t Count = 0; 8966 for (const MapData &L : DeclComponentLists) { 8967 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8968 OpenMPMapClauseKind MapType; 8969 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8970 bool IsImplicit; 8971 const ValueDecl *Mapper; 8972 const Expr *VarRef; 8973 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8974 L; 8975 ++Count; 8976 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8977 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8978 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8979 VarRef) = L1; 8980 auto CI = Components.rbegin(); 8981 auto CE = Components.rend(); 8982 auto SI = Components1.rbegin(); 8983 auto SE = Components1.rend(); 8984 for (; CI != CE && SI != SE; ++CI, ++SI) { 8985 if (CI->getAssociatedExpression()->getStmtClass() != 8986 SI->getAssociatedExpression()->getStmtClass()) 8987 break; 8988 // Are we dealing with different variables/fields? 8989 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8990 break; 8991 } 8992 // Found overlapping if, at least for one component, reached the head 8993 // of the components list. 8994 if (CI == CE || SI == SE) { 8995 // Ignore it if it is the same component. 8996 if (CI == CE && SI == SE) 8997 continue; 8998 const auto It = (SI == SE) ? CI : SI; 8999 // If one component is a pointer and another one is a kind of 9000 // dereference of this pointer (array subscript, section, dereference, 9001 // etc.), it is not an overlapping. 9002 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9003 std::prev(It) 9004 ->getAssociatedExpression() 9005 ->getType() 9006 ->isPointerType()) 9007 continue; 9008 const MapData &BaseData = CI == CE ? L : L1; 9009 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9010 SI == SE ? Components : Components1; 9011 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9012 OverlappedElements.getSecond().push_back(SubData); 9013 } 9014 } 9015 } 9016 // Sort the overlapped elements for each item. 9017 llvm::SmallVector<const FieldDecl *, 4> Layout; 9018 if (!OverlappedData.empty()) { 9019 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9020 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9021 while (BaseType != OrigType) { 9022 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9023 OrigType = BaseType->getPointeeOrArrayElementType(); 9024 } 9025 9026 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9027 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9028 else { 9029 const auto *RD = BaseType->getAsRecordDecl(); 9030 Layout.append(RD->field_begin(), RD->field_end()); 9031 } 9032 } 9033 for (auto &Pair : OverlappedData) { 9034 llvm::stable_sort( 9035 Pair.getSecond(), 9036 [&Layout]( 9037 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9038 OMPClauseMappableExprCommon::MappableExprComponentListRef 9039 Second) { 9040 auto CI = First.rbegin(); 9041 auto CE = First.rend(); 9042 auto SI = Second.rbegin(); 9043 auto SE = Second.rend(); 9044 for (; CI != CE && SI != SE; ++CI, ++SI) { 9045 if (CI->getAssociatedExpression()->getStmtClass() != 9046 SI->getAssociatedExpression()->getStmtClass()) 9047 break; 9048 // Are we dealing with different variables/fields? 9049 if (CI->getAssociatedDeclaration() != 9050 SI->getAssociatedDeclaration()) 9051 break; 9052 } 9053 9054 // Lists contain the same elements. 9055 if (CI == CE && SI == SE) 9056 return false; 9057 9058 // List with less elements is less than list with more elements. 9059 if (CI == CE || SI == SE) 9060 return CI == CE; 9061 9062 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9063 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9064 if (FD1->getParent() == FD2->getParent()) 9065 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9066 const auto It = 9067 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9068 return FD == FD1 || FD == FD2; 9069 }); 9070 return *It == FD1; 9071 }); 9072 } 9073 9074 // Associated with a capture, because the mapping flags depend on it. 9075 // Go through all of the elements with the overlapped elements. 9076 bool IsFirstComponentList = true; 9077 for (const auto &Pair : OverlappedData) { 9078 const MapData &L = *Pair.getFirst(); 9079 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9080 OpenMPMapClauseKind MapType; 9081 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9082 bool IsImplicit; 9083 const ValueDecl *Mapper; 9084 const Expr *VarRef; 9085 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9086 L; 9087 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9088 OverlappedComponents = Pair.getSecond(); 9089 generateInfoForComponentList( 9090 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9091 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9092 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9093 IsFirstComponentList = false; 9094 } 9095 // Go through other elements without overlapped elements. 9096 for (const MapData &L : DeclComponentLists) { 9097 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9098 OpenMPMapClauseKind MapType; 9099 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9100 bool IsImplicit; 9101 const ValueDecl *Mapper; 9102 const Expr *VarRef; 9103 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9104 L; 9105 auto It = OverlappedData.find(&L); 9106 if (It == OverlappedData.end()) 9107 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9108 Components, CombinedInfo, PartialStruct, 9109 IsFirstComponentList, IsImplicit, Mapper, 9110 /*ForDeviceAddr=*/false, VD, VarRef); 9111 IsFirstComponentList = false; 9112 } 9113 } 9114 9115 /// Generate the default map information for a given capture \a CI, 9116 /// record field declaration \a RI and captured value \a CV. 9117 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9118 const FieldDecl &RI, llvm::Value *CV, 9119 MapCombinedInfoTy &CombinedInfo) const { 9120 bool IsImplicit = true; 9121 // Do the default mapping. 9122 if (CI.capturesThis()) { 9123 CombinedInfo.Exprs.push_back(nullptr); 9124 CombinedInfo.BasePointers.push_back(CV); 9125 CombinedInfo.Pointers.push_back(CV); 9126 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9127 CombinedInfo.Sizes.push_back( 9128 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9129 CGF.Int64Ty, /*isSigned=*/true)); 9130 // Default map type. 9131 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9132 } else if (CI.capturesVariableByCopy()) { 9133 const VarDecl *VD = CI.getCapturedVar(); 9134 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9135 CombinedInfo.BasePointers.push_back(CV); 9136 CombinedInfo.Pointers.push_back(CV); 9137 if (!RI.getType()->isAnyPointerType()) { 9138 // We have to signal to the runtime captures passed by value that are 9139 // not pointers. 9140 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9141 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9142 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9143 } else { 9144 // Pointers are implicitly mapped with a zero size and no flags 9145 // (other than first map that is added for all implicit maps). 9146 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9147 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9148 } 9149 auto I = FirstPrivateDecls.find(VD); 9150 if (I != FirstPrivateDecls.end()) 9151 IsImplicit = I->getSecond(); 9152 } else { 9153 assert(CI.capturesVariable() && "Expected captured reference."); 9154 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9155 QualType ElementType = PtrTy->getPointeeType(); 9156 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9157 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9158 // The default map type for a scalar/complex type is 'to' because by 9159 // default the value doesn't have to be retrieved. For an aggregate 9160 // type, the default is 'tofrom'. 9161 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9162 const VarDecl *VD = CI.getCapturedVar(); 9163 auto I = FirstPrivateDecls.find(VD); 9164 if (I != FirstPrivateDecls.end() && 9165 VD->getType().isConstant(CGF.getContext())) { 9166 llvm::Constant *Addr = 9167 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9168 // Copy the value of the original variable to the new global copy. 9169 CGF.Builder.CreateMemCpy( 9170 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9171 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9172 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9173 // Use new global variable as the base pointers. 9174 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9175 CombinedInfo.BasePointers.push_back(Addr); 9176 CombinedInfo.Pointers.push_back(Addr); 9177 } else { 9178 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9179 CombinedInfo.BasePointers.push_back(CV); 9180 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9181 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9182 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9183 AlignmentSource::Decl)); 9184 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9185 } else { 9186 CombinedInfo.Pointers.push_back(CV); 9187 } 9188 } 9189 if (I != FirstPrivateDecls.end()) 9190 IsImplicit = I->getSecond(); 9191 } 9192 // Every default map produces a single argument which is a target parameter. 9193 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9194 9195 // Add flag stating this is an implicit map. 9196 if (IsImplicit) 9197 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9198 9199 // No user-defined mapper for default mapping. 9200 CombinedInfo.Mappers.push_back(nullptr); 9201 } 9202 }; 9203 } // anonymous namespace 9204 9205 static void emitNonContiguousDescriptor( 9206 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9207 CGOpenMPRuntime::TargetDataInfo &Info) { 9208 CodeGenModule &CGM = CGF.CGM; 9209 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9210 &NonContigInfo = CombinedInfo.NonContigInfo; 9211 9212 // Build an array of struct descriptor_dim and then assign it to 9213 // offload_args. 9214 // 9215 // struct descriptor_dim { 9216 // uint64_t offset; 9217 // uint64_t count; 9218 // uint64_t stride 9219 // }; 9220 ASTContext &C = CGF.getContext(); 9221 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9222 RecordDecl *RD; 9223 RD = C.buildImplicitRecord("descriptor_dim"); 9224 RD->startDefinition(); 9225 addFieldToRecordDecl(C, RD, Int64Ty); 9226 addFieldToRecordDecl(C, RD, Int64Ty); 9227 addFieldToRecordDecl(C, RD, Int64Ty); 9228 RD->completeDefinition(); 9229 QualType DimTy = C.getRecordType(RD); 9230 9231 enum { OffsetFD = 0, CountFD, StrideFD }; 9232 // We need two index variable here since the size of "Dims" is the same as the 9233 // size of Components, however, the size of offset, count, and stride is equal 9234 // to the size of base declaration that is non-contiguous. 9235 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9236 // Skip emitting ir if dimension size is 1 since it cannot be 9237 // non-contiguous. 9238 if (NonContigInfo.Dims[I] == 1) 9239 continue; 9240 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9241 QualType ArrayTy = 9242 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9243 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9244 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9245 unsigned RevIdx = EE - II - 1; 9246 LValue DimsLVal = CGF.MakeAddrLValue( 9247 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9248 // Offset 9249 LValue OffsetLVal = CGF.EmitLValueForField( 9250 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9251 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9252 // Count 9253 LValue CountLVal = CGF.EmitLValueForField( 9254 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9255 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9256 // Stride 9257 LValue StrideLVal = CGF.EmitLValueForField( 9258 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9259 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9260 } 9261 // args[I] = &dims 9262 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9263 DimsAddr, CGM.Int8PtrTy); 9264 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9265 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9266 Info.PointersArray, 0, I); 9267 Address PAddr(P, CGF.getPointerAlign()); 9268 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9269 ++L; 9270 } 9271 } 9272 9273 /// Emit a string constant containing the names of the values mapped to the 9274 /// offloading runtime library. 9275 llvm::Constant * 9276 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9277 MappableExprsHandler::MappingExprInfo &MapExprs) { 9278 llvm::Constant *SrcLocStr; 9279 if (!MapExprs.getMapDecl()) { 9280 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9281 } else { 9282 std::string ExprName = ""; 9283 if (MapExprs.getMapExpr()) { 9284 PrintingPolicy P(CGF.getContext().getLangOpts()); 9285 llvm::raw_string_ostream OS(ExprName); 9286 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9287 OS.flush(); 9288 } else { 9289 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9290 } 9291 9292 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9293 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9294 const char *FileName = PLoc.getFilename(); 9295 unsigned Line = PLoc.getLine(); 9296 unsigned Column = PLoc.getColumn(); 9297 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9298 Line, Column); 9299 } 9300 return SrcLocStr; 9301 } 9302 9303 /// Emit the arrays used to pass the captures and map information to the 9304 /// offloading runtime library. If there is no map or capture information, 9305 /// return nullptr by reference. 9306 static void emitOffloadingArrays( 9307 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9308 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9309 bool IsNonContiguous = false) { 9310 CodeGenModule &CGM = CGF.CGM; 9311 ASTContext &Ctx = CGF.getContext(); 9312 9313 // Reset the array information. 9314 Info.clearArrayInfo(); 9315 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9316 9317 if (Info.NumberOfPtrs) { 9318 // Detect if we have any capture size requiring runtime evaluation of the 9319 // size so that a constant array could be eventually used. 9320 bool hasRuntimeEvaluationCaptureSize = false; 9321 for (llvm::Value *S : CombinedInfo.Sizes) 9322 if (!isa<llvm::Constant>(S)) { 9323 hasRuntimeEvaluationCaptureSize = true; 9324 break; 9325 } 9326 9327 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9328 QualType PointerArrayType = Ctx.getConstantArrayType( 9329 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9330 /*IndexTypeQuals=*/0); 9331 9332 Info.BasePointersArray = 9333 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9334 Info.PointersArray = 9335 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9336 Address MappersArray = 9337 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9338 Info.MappersArray = MappersArray.getPointer(); 9339 9340 // If we don't have any VLA types or other types that require runtime 9341 // evaluation, we can use a constant array for the map sizes, otherwise we 9342 // need to fill up the arrays as we do for the pointers. 9343 QualType Int64Ty = 9344 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9345 if (hasRuntimeEvaluationCaptureSize) { 9346 QualType SizeArrayType = Ctx.getConstantArrayType( 9347 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9348 /*IndexTypeQuals=*/0); 9349 Info.SizesArray = 9350 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9351 } else { 9352 // We expect all the sizes to be constant, so we collect them to create 9353 // a constant array. 9354 SmallVector<llvm::Constant *, 16> ConstSizes; 9355 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9356 if (IsNonContiguous && 9357 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9358 ConstSizes.push_back(llvm::ConstantInt::get( 9359 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9360 } else { 9361 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9362 } 9363 } 9364 9365 auto *SizesArrayInit = llvm::ConstantArray::get( 9366 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9367 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9368 auto *SizesArrayGbl = new llvm::GlobalVariable( 9369 CGM.getModule(), SizesArrayInit->getType(), 9370 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9371 SizesArrayInit, Name); 9372 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9373 Info.SizesArray = SizesArrayGbl; 9374 } 9375 9376 // The map types are always constant so we don't need to generate code to 9377 // fill arrays. Instead, we create an array constant. 9378 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9379 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9380 llvm::Constant *MapTypesArrayInit = 9381 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9382 std::string MaptypesName = 9383 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9384 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9385 CGM.getModule(), MapTypesArrayInit->getType(), 9386 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9387 MapTypesArrayInit, MaptypesName); 9388 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9389 Info.MapTypesArray = MapTypesArrayGbl; 9390 9391 // The information types are only built if there is debug information 9392 // requested. 9393 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9394 Info.MapNamesArray = llvm::Constant::getNullValue( 9395 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9396 } else { 9397 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9398 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9399 }; 9400 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9401 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9402 9403 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9404 llvm::ArrayType::get( 9405 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9406 CombinedInfo.Exprs.size()), 9407 InfoMap); 9408 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9409 CGM.getModule(), MapNamesArrayInit->getType(), 9410 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9411 MapNamesArrayInit, 9412 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9413 Info.MapNamesArray = MapNamesArrayGbl; 9414 } 9415 9416 // If there's a present map type modifier, it must not be applied to the end 9417 // of a region, so generate a separate map type array in that case. 9418 if (Info.separateBeginEndCalls()) { 9419 bool EndMapTypesDiffer = false; 9420 for (uint64_t &Type : Mapping) { 9421 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9422 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9423 EndMapTypesDiffer = true; 9424 } 9425 } 9426 if (EndMapTypesDiffer) { 9427 MapTypesArrayInit = 9428 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9429 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9430 MapTypesArrayGbl = new llvm::GlobalVariable( 9431 CGM.getModule(), MapTypesArrayInit->getType(), 9432 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9433 MapTypesArrayInit, MaptypesName); 9434 MapTypesArrayGbl->setUnnamedAddr( 9435 llvm::GlobalValue::UnnamedAddr::Global); 9436 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9437 } 9438 } 9439 9440 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9441 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9442 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9443 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9444 Info.BasePointersArray, 0, I); 9445 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9446 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9447 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9448 CGF.Builder.CreateStore(BPVal, BPAddr); 9449 9450 if (Info.requiresDevicePointerInfo()) 9451 if (const ValueDecl *DevVD = 9452 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9453 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9454 9455 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9456 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9457 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9458 Info.PointersArray, 0, I); 9459 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9460 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9461 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9462 CGF.Builder.CreateStore(PVal, PAddr); 9463 9464 if (hasRuntimeEvaluationCaptureSize) { 9465 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9466 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9467 Info.SizesArray, 9468 /*Idx0=*/0, 9469 /*Idx1=*/I); 9470 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9471 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9472 CGM.Int64Ty, 9473 /*isSigned=*/true), 9474 SAddr); 9475 } 9476 9477 // Fill up the mapper array. 9478 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9479 if (CombinedInfo.Mappers[I]) { 9480 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9481 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9482 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9483 Info.HasMapper = true; 9484 } 9485 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9486 CGF.Builder.CreateStore(MFunc, MAddr); 9487 } 9488 } 9489 9490 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9491 Info.NumberOfPtrs == 0) 9492 return; 9493 9494 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9495 } 9496 9497 namespace { 9498 /// Additional arguments for emitOffloadingArraysArgument function. 9499 struct ArgumentsOptions { 9500 bool ForEndCall = false; 9501 ArgumentsOptions() = default; 9502 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9503 }; 9504 } // namespace 9505 9506 /// Emit the arguments to be passed to the runtime library based on the 9507 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9508 /// ForEndCall, emit map types to be passed for the end of the region instead of 9509 /// the beginning. 9510 static void emitOffloadingArraysArgument( 9511 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9512 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9513 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9514 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9515 const ArgumentsOptions &Options = ArgumentsOptions()) { 9516 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9517 "expected region end call to runtime only when end call is separate"); 9518 CodeGenModule &CGM = CGF.CGM; 9519 if (Info.NumberOfPtrs) { 9520 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9521 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9522 Info.BasePointersArray, 9523 /*Idx0=*/0, /*Idx1=*/0); 9524 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9525 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9526 Info.PointersArray, 9527 /*Idx0=*/0, 9528 /*Idx1=*/0); 9529 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9530 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9531 /*Idx0=*/0, /*Idx1=*/0); 9532 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9533 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9534 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9535 : Info.MapTypesArray, 9536 /*Idx0=*/0, 9537 /*Idx1=*/0); 9538 9539 // Only emit the mapper information arrays if debug information is 9540 // requested. 9541 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9542 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9543 else 9544 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9545 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9546 Info.MapNamesArray, 9547 /*Idx0=*/0, 9548 /*Idx1=*/0); 9549 // If there is no user-defined mapper, set the mapper array to nullptr to 9550 // avoid an unnecessary data privatization 9551 if (!Info.HasMapper) 9552 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9553 else 9554 MappersArrayArg = 9555 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9556 } else { 9557 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9558 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9559 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9560 MapTypesArrayArg = 9561 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9562 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9563 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9564 } 9565 } 9566 9567 /// Check for inner distribute directive. 9568 static const OMPExecutableDirective * 9569 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9570 const auto *CS = D.getInnermostCapturedStmt(); 9571 const auto *Body = 9572 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9573 const Stmt *ChildStmt = 9574 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9575 9576 if (const auto *NestedDir = 9577 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9578 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9579 switch (D.getDirectiveKind()) { 9580 case OMPD_target: 9581 if (isOpenMPDistributeDirective(DKind)) 9582 return NestedDir; 9583 if (DKind == OMPD_teams) { 9584 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9585 /*IgnoreCaptured=*/true); 9586 if (!Body) 9587 return nullptr; 9588 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9589 if (const auto *NND = 9590 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9591 DKind = NND->getDirectiveKind(); 9592 if (isOpenMPDistributeDirective(DKind)) 9593 return NND; 9594 } 9595 } 9596 return nullptr; 9597 case OMPD_target_teams: 9598 if (isOpenMPDistributeDirective(DKind)) 9599 return NestedDir; 9600 return nullptr; 9601 case OMPD_target_parallel: 9602 case OMPD_target_simd: 9603 case OMPD_target_parallel_for: 9604 case OMPD_target_parallel_for_simd: 9605 return nullptr; 9606 case OMPD_target_teams_distribute: 9607 case OMPD_target_teams_distribute_simd: 9608 case OMPD_target_teams_distribute_parallel_for: 9609 case OMPD_target_teams_distribute_parallel_for_simd: 9610 case OMPD_parallel: 9611 case OMPD_for: 9612 case OMPD_parallel_for: 9613 case OMPD_parallel_master: 9614 case OMPD_parallel_sections: 9615 case OMPD_for_simd: 9616 case OMPD_parallel_for_simd: 9617 case OMPD_cancel: 9618 case OMPD_cancellation_point: 9619 case OMPD_ordered: 9620 case OMPD_threadprivate: 9621 case OMPD_allocate: 9622 case OMPD_task: 9623 case OMPD_simd: 9624 case OMPD_tile: 9625 case OMPD_sections: 9626 case OMPD_section: 9627 case OMPD_single: 9628 case OMPD_master: 9629 case OMPD_critical: 9630 case OMPD_taskyield: 9631 case OMPD_barrier: 9632 case OMPD_taskwait: 9633 case OMPD_taskgroup: 9634 case OMPD_atomic: 9635 case OMPD_flush: 9636 case OMPD_depobj: 9637 case OMPD_scan: 9638 case OMPD_teams: 9639 case OMPD_target_data: 9640 case OMPD_target_exit_data: 9641 case OMPD_target_enter_data: 9642 case OMPD_distribute: 9643 case OMPD_distribute_simd: 9644 case OMPD_distribute_parallel_for: 9645 case OMPD_distribute_parallel_for_simd: 9646 case OMPD_teams_distribute: 9647 case OMPD_teams_distribute_simd: 9648 case OMPD_teams_distribute_parallel_for: 9649 case OMPD_teams_distribute_parallel_for_simd: 9650 case OMPD_target_update: 9651 case OMPD_declare_simd: 9652 case OMPD_declare_variant: 9653 case OMPD_begin_declare_variant: 9654 case OMPD_end_declare_variant: 9655 case OMPD_declare_target: 9656 case OMPD_end_declare_target: 9657 case OMPD_declare_reduction: 9658 case OMPD_declare_mapper: 9659 case OMPD_taskloop: 9660 case OMPD_taskloop_simd: 9661 case OMPD_master_taskloop: 9662 case OMPD_master_taskloop_simd: 9663 case OMPD_parallel_master_taskloop: 9664 case OMPD_parallel_master_taskloop_simd: 9665 case OMPD_requires: 9666 case OMPD_unknown: 9667 default: 9668 llvm_unreachable("Unexpected directive."); 9669 } 9670 } 9671 9672 return nullptr; 9673 } 9674 9675 /// Emit the user-defined mapper function. The code generation follows the 9676 /// pattern in the example below. 9677 /// \code 9678 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9679 /// void *base, void *begin, 9680 /// int64_t size, int64_t type, 9681 /// void *name = nullptr) { 9682 /// // Allocate space for an array section first or add a base/begin for 9683 /// // pointer dereference. 9684 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9685 /// !maptype.IsDelete) 9686 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9687 /// size*sizeof(Ty), clearToFromMember(type)); 9688 /// // Map members. 9689 /// for (unsigned i = 0; i < size; i++) { 9690 /// // For each component specified by this mapper: 9691 /// for (auto c : begin[i]->all_components) { 9692 /// if (c.hasMapper()) 9693 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9694 /// c.arg_type, c.arg_name); 9695 /// else 9696 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9697 /// c.arg_begin, c.arg_size, c.arg_type, 9698 /// c.arg_name); 9699 /// } 9700 /// } 9701 /// // Delete the array section. 9702 /// if (size > 1 && maptype.IsDelete) 9703 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9704 /// size*sizeof(Ty), clearToFromMember(type)); 9705 /// } 9706 /// \endcode 9707 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9708 CodeGenFunction *CGF) { 9709 if (UDMMap.count(D) > 0) 9710 return; 9711 ASTContext &C = CGM.getContext(); 9712 QualType Ty = D->getType(); 9713 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9714 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9715 auto *MapperVarDecl = 9716 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9717 SourceLocation Loc = D->getLocation(); 9718 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9719 9720 // Prepare mapper function arguments and attributes. 9721 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9722 C.VoidPtrTy, ImplicitParamDecl::Other); 9723 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9724 ImplicitParamDecl::Other); 9725 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9726 C.VoidPtrTy, ImplicitParamDecl::Other); 9727 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9728 ImplicitParamDecl::Other); 9729 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9730 ImplicitParamDecl::Other); 9731 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9732 ImplicitParamDecl::Other); 9733 FunctionArgList Args; 9734 Args.push_back(&HandleArg); 9735 Args.push_back(&BaseArg); 9736 Args.push_back(&BeginArg); 9737 Args.push_back(&SizeArg); 9738 Args.push_back(&TypeArg); 9739 Args.push_back(&NameArg); 9740 const CGFunctionInfo &FnInfo = 9741 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9742 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9743 SmallString<64> TyStr; 9744 llvm::raw_svector_ostream Out(TyStr); 9745 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9746 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9747 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9748 Name, &CGM.getModule()); 9749 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9750 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9751 // Start the mapper function code generation. 9752 CodeGenFunction MapperCGF(CGM); 9753 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9754 // Compute the starting and end addresses of array elements. 9755 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9756 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9757 C.getPointerType(Int64Ty), Loc); 9758 // Prepare common arguments for array initiation and deletion. 9759 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9760 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9761 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9762 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9763 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9764 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9765 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9766 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9767 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9768 // Convert the size in bytes into the number of array elements. 9769 Size = MapperCGF.Builder.CreateExactUDiv( 9770 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9771 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9772 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9773 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9774 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9775 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9776 C.getPointerType(Int64Ty), Loc); 9777 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9778 MapperCGF.GetAddrOfLocalVar(&NameArg), 9779 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9780 9781 // Emit array initiation if this is an array section and \p MapType indicates 9782 // that memory allocation is required. 9783 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9784 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9785 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9786 9787 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9788 9789 // Emit the loop header block. 9790 MapperCGF.EmitBlock(HeadBB); 9791 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9792 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9793 // Evaluate whether the initial condition is satisfied. 9794 llvm::Value *IsEmpty = 9795 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9796 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9797 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9798 9799 // Emit the loop body block. 9800 MapperCGF.EmitBlock(BodyBB); 9801 llvm::BasicBlock *LastBB = BodyBB; 9802 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9803 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9804 PtrPHI->addIncoming(PtrBegin, EntryBB); 9805 Address PtrCurrent = 9806 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9807 .getAlignment() 9808 .alignmentOfArrayElement(ElementSize)); 9809 // Privatize the declared variable of mapper to be the current array element. 9810 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9811 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9812 (void)Scope.Privatize(); 9813 9814 // Get map clause information. Fill up the arrays with all mapped variables. 9815 MappableExprsHandler::MapCombinedInfoTy Info; 9816 MappableExprsHandler MEHandler(*D, MapperCGF); 9817 MEHandler.generateAllInfoForMapper(Info); 9818 9819 // Call the runtime API __tgt_mapper_num_components to get the number of 9820 // pre-existing components. 9821 llvm::Value *OffloadingArgs[] = {Handle}; 9822 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9823 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9824 OMPRTL___tgt_mapper_num_components), 9825 OffloadingArgs); 9826 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9827 PreviousSize, 9828 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9829 9830 // Fill up the runtime mapper handle for all components. 9831 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9832 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9833 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9834 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9835 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9836 llvm::Value *CurSizeArg = Info.Sizes[I]; 9837 llvm::Value *CurNameArg = 9838 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9839 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9840 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9841 9842 // Extract the MEMBER_OF field from the map type. 9843 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9844 llvm::Value *MemberMapType = 9845 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9846 9847 // Combine the map type inherited from user-defined mapper with that 9848 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9849 // bits of the \a MapType, which is the input argument of the mapper 9850 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9851 // bits of MemberMapType. 9852 // [OpenMP 5.0], 1.2.6. map-type decay. 9853 // | alloc | to | from | tofrom | release | delete 9854 // ---------------------------------------------------------- 9855 // alloc | alloc | alloc | alloc | alloc | release | delete 9856 // to | alloc | to | alloc | to | release | delete 9857 // from | alloc | alloc | from | from | release | delete 9858 // tofrom | alloc | to | from | tofrom | release | delete 9859 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9860 MapType, 9861 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9862 MappableExprsHandler::OMP_MAP_FROM)); 9863 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9864 llvm::BasicBlock *AllocElseBB = 9865 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9866 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9867 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9868 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9869 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9870 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9871 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9872 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9873 MapperCGF.EmitBlock(AllocBB); 9874 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9875 MemberMapType, 9876 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9877 MappableExprsHandler::OMP_MAP_FROM))); 9878 MapperCGF.Builder.CreateBr(EndBB); 9879 MapperCGF.EmitBlock(AllocElseBB); 9880 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9881 LeftToFrom, 9882 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9883 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9884 // In case of to, clear OMP_MAP_FROM. 9885 MapperCGF.EmitBlock(ToBB); 9886 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9887 MemberMapType, 9888 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9889 MapperCGF.Builder.CreateBr(EndBB); 9890 MapperCGF.EmitBlock(ToElseBB); 9891 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9892 LeftToFrom, 9893 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9894 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9895 // In case of from, clear OMP_MAP_TO. 9896 MapperCGF.EmitBlock(FromBB); 9897 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9898 MemberMapType, 9899 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9900 // In case of tofrom, do nothing. 9901 MapperCGF.EmitBlock(EndBB); 9902 LastBB = EndBB; 9903 llvm::PHINode *CurMapType = 9904 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9905 CurMapType->addIncoming(AllocMapType, AllocBB); 9906 CurMapType->addIncoming(ToMapType, ToBB); 9907 CurMapType->addIncoming(FromMapType, FromBB); 9908 CurMapType->addIncoming(MemberMapType, ToElseBB); 9909 9910 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9911 CurSizeArg, CurMapType, CurNameArg}; 9912 if (Info.Mappers[I]) { 9913 // Call the corresponding mapper function. 9914 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9915 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9916 assert(MapperFunc && "Expect a valid mapper function is available."); 9917 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9918 } else { 9919 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9920 // data structure. 9921 MapperCGF.EmitRuntimeCall( 9922 OMPBuilder.getOrCreateRuntimeFunction( 9923 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9924 OffloadingArgs); 9925 } 9926 } 9927 9928 // Update the pointer to point to the next element that needs to be mapped, 9929 // and check whether we have mapped all elements. 9930 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9931 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9932 PtrPHI->addIncoming(PtrNext, LastBB); 9933 llvm::Value *IsDone = 9934 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9935 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9936 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9937 9938 MapperCGF.EmitBlock(ExitBB); 9939 // Emit array deletion if this is an array section and \p MapType indicates 9940 // that deletion is required. 9941 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9942 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9943 9944 // Emit the function exit block. 9945 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9946 MapperCGF.FinishFunction(); 9947 UDMMap.try_emplace(D, Fn); 9948 if (CGF) { 9949 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9950 Decls.second.push_back(D); 9951 } 9952 } 9953 9954 /// Emit the array initialization or deletion portion for user-defined mapper 9955 /// code generation. First, it evaluates whether an array section is mapped and 9956 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9957 /// true, and \a MapType indicates to not delete this array, array 9958 /// initialization code is generated. If \a IsInit is false, and \a MapType 9959 /// indicates to not this array, array deletion code is generated. 9960 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9961 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9962 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9963 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9964 bool IsInit) { 9965 StringRef Prefix = IsInit ? ".init" : ".del"; 9966 9967 // Evaluate if this is an array section. 9968 llvm::BasicBlock *BodyBB = 9969 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9970 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9971 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9972 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9973 MapType, 9974 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9975 llvm::Value *DeleteCond; 9976 llvm::Value *Cond; 9977 if (IsInit) { 9978 // base != begin? 9979 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9980 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9981 // IsPtrAndObj? 9982 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9983 MapType, 9984 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9985 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9986 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9987 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9988 DeleteCond = MapperCGF.Builder.CreateIsNull( 9989 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9990 } else { 9991 Cond = IsArray; 9992 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9993 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9994 } 9995 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9996 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9997 9998 MapperCGF.EmitBlock(BodyBB); 9999 // Get the array size by multiplying element size and element number (i.e., \p 10000 // Size). 10001 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10002 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10003 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10004 // memory allocation/deletion purpose only. 10005 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10006 MapType, 10007 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10008 MappableExprsHandler::OMP_MAP_FROM))); 10009 MapTypeArg = MapperCGF.Builder.CreateOr( 10010 MapTypeArg, 10011 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10012 10013 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10014 // data structure. 10015 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10016 ArraySize, MapTypeArg, MapName}; 10017 MapperCGF.EmitRuntimeCall( 10018 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10019 OMPRTL___tgt_push_mapper_component), 10020 OffloadingArgs); 10021 } 10022 10023 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10024 const OMPDeclareMapperDecl *D) { 10025 auto I = UDMMap.find(D); 10026 if (I != UDMMap.end()) 10027 return I->second; 10028 emitUserDefinedMapper(D); 10029 return UDMMap.lookup(D); 10030 } 10031 10032 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10033 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10034 llvm::Value *DeviceID, 10035 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10036 const OMPLoopDirective &D)> 10037 SizeEmitter) { 10038 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10039 const OMPExecutableDirective *TD = &D; 10040 // Get nested teams distribute kind directive, if any. 10041 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10042 TD = getNestedDistributeDirective(CGM.getContext(), D); 10043 if (!TD) 10044 return; 10045 const auto *LD = cast<OMPLoopDirective>(TD); 10046 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10047 PrePostActionTy &) { 10048 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10049 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10050 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10051 CGF.EmitRuntimeCall( 10052 OMPBuilder.getOrCreateRuntimeFunction( 10053 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10054 Args); 10055 } 10056 }; 10057 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10058 } 10059 10060 void CGOpenMPRuntime::emitTargetCall( 10061 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10062 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10063 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10064 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10065 const OMPLoopDirective &D)> 10066 SizeEmitter) { 10067 if (!CGF.HaveInsertPoint()) 10068 return; 10069 10070 assert(OutlinedFn && "Invalid outlined function!"); 10071 10072 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10073 D.hasClausesOfKind<OMPNowaitClause>(); 10074 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10075 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10076 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10077 PrePostActionTy &) { 10078 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10079 }; 10080 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10081 10082 CodeGenFunction::OMPTargetDataInfo InputInfo; 10083 llvm::Value *MapTypesArray = nullptr; 10084 llvm::Value *MapNamesArray = nullptr; 10085 // Fill up the pointer arrays and transfer execution to the device. 10086 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10087 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10088 &CapturedVars, 10089 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10090 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10091 // Reverse offloading is not supported, so just execute on the host. 10092 if (RequiresOuterTask) { 10093 CapturedVars.clear(); 10094 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10095 } 10096 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10097 return; 10098 } 10099 10100 // On top of the arrays that were filled up, the target offloading call 10101 // takes as arguments the device id as well as the host pointer. The host 10102 // pointer is used by the runtime library to identify the current target 10103 // region, so it only has to be unique and not necessarily point to 10104 // anything. It could be the pointer to the outlined function that 10105 // implements the target region, but we aren't using that so that the 10106 // compiler doesn't need to keep that, and could therefore inline the host 10107 // function if proven worthwhile during optimization. 10108 10109 // From this point on, we need to have an ID of the target region defined. 10110 assert(OutlinedFnID && "Invalid outlined function ID!"); 10111 10112 // Emit device ID if any. 10113 llvm::Value *DeviceID; 10114 if (Device.getPointer()) { 10115 assert((Device.getInt() == OMPC_DEVICE_unknown || 10116 Device.getInt() == OMPC_DEVICE_device_num) && 10117 "Expected device_num modifier."); 10118 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10119 DeviceID = 10120 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10121 } else { 10122 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10123 } 10124 10125 // Emit the number of elements in the offloading arrays. 10126 llvm::Value *PointerNum = 10127 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10128 10129 // Return value of the runtime offloading call. 10130 llvm::Value *Return; 10131 10132 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10133 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10134 10135 // Source location for the ident struct 10136 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10137 10138 // Emit tripcount for the target loop-based directive. 10139 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10140 10141 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10142 // The target region is an outlined function launched by the runtime 10143 // via calls __tgt_target() or __tgt_target_teams(). 10144 // 10145 // __tgt_target() launches a target region with one team and one thread, 10146 // executing a serial region. This master thread may in turn launch 10147 // more threads within its team upon encountering a parallel region, 10148 // however, no additional teams can be launched on the device. 10149 // 10150 // __tgt_target_teams() launches a target region with one or more teams, 10151 // each with one or more threads. This call is required for target 10152 // constructs such as: 10153 // 'target teams' 10154 // 'target' / 'teams' 10155 // 'target teams distribute parallel for' 10156 // 'target parallel' 10157 // and so on. 10158 // 10159 // Note that on the host and CPU targets, the runtime implementation of 10160 // these calls simply call the outlined function without forking threads. 10161 // The outlined functions themselves have runtime calls to 10162 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10163 // the compiler in emitTeamsCall() and emitParallelCall(). 10164 // 10165 // In contrast, on the NVPTX target, the implementation of 10166 // __tgt_target_teams() launches a GPU kernel with the requested number 10167 // of teams and threads so no additional calls to the runtime are required. 10168 if (NumTeams) { 10169 // If we have NumTeams defined this means that we have an enclosed teams 10170 // region. Therefore we also expect to have NumThreads defined. These two 10171 // values should be defined in the presence of a teams directive, 10172 // regardless of having any clauses associated. If the user is using teams 10173 // but no clauses, these two values will be the default that should be 10174 // passed to the runtime library - a 32-bit integer with the value zero. 10175 assert(NumThreads && "Thread limit expression should be available along " 10176 "with number of teams."); 10177 llvm::Value *OffloadingArgs[] = {RTLoc, 10178 DeviceID, 10179 OutlinedFnID, 10180 PointerNum, 10181 InputInfo.BasePointersArray.getPointer(), 10182 InputInfo.PointersArray.getPointer(), 10183 InputInfo.SizesArray.getPointer(), 10184 MapTypesArray, 10185 MapNamesArray, 10186 InputInfo.MappersArray.getPointer(), 10187 NumTeams, 10188 NumThreads}; 10189 Return = CGF.EmitRuntimeCall( 10190 OMPBuilder.getOrCreateRuntimeFunction( 10191 CGM.getModule(), HasNowait 10192 ? OMPRTL___tgt_target_teams_nowait_mapper 10193 : OMPRTL___tgt_target_teams_mapper), 10194 OffloadingArgs); 10195 } else { 10196 llvm::Value *OffloadingArgs[] = {RTLoc, 10197 DeviceID, 10198 OutlinedFnID, 10199 PointerNum, 10200 InputInfo.BasePointersArray.getPointer(), 10201 InputInfo.PointersArray.getPointer(), 10202 InputInfo.SizesArray.getPointer(), 10203 MapTypesArray, 10204 MapNamesArray, 10205 InputInfo.MappersArray.getPointer()}; 10206 Return = CGF.EmitRuntimeCall( 10207 OMPBuilder.getOrCreateRuntimeFunction( 10208 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10209 : OMPRTL___tgt_target_mapper), 10210 OffloadingArgs); 10211 } 10212 10213 // Check the error code and execute the host version if required. 10214 llvm::BasicBlock *OffloadFailedBlock = 10215 CGF.createBasicBlock("omp_offload.failed"); 10216 llvm::BasicBlock *OffloadContBlock = 10217 CGF.createBasicBlock("omp_offload.cont"); 10218 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10219 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10220 10221 CGF.EmitBlock(OffloadFailedBlock); 10222 if (RequiresOuterTask) { 10223 CapturedVars.clear(); 10224 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10225 } 10226 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10227 CGF.EmitBranch(OffloadContBlock); 10228 10229 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10230 }; 10231 10232 // Notify that the host version must be executed. 10233 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10234 RequiresOuterTask](CodeGenFunction &CGF, 10235 PrePostActionTy &) { 10236 if (RequiresOuterTask) { 10237 CapturedVars.clear(); 10238 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10239 } 10240 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10241 }; 10242 10243 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10244 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10245 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10246 // Fill up the arrays with all the captured variables. 10247 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10248 10249 // Get mappable expression information. 10250 MappableExprsHandler MEHandler(D, CGF); 10251 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10252 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10253 10254 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10255 auto *CV = CapturedVars.begin(); 10256 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10257 CE = CS.capture_end(); 10258 CI != CE; ++CI, ++RI, ++CV) { 10259 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10260 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10261 10262 // VLA sizes are passed to the outlined region by copy and do not have map 10263 // information associated. 10264 if (CI->capturesVariableArrayType()) { 10265 CurInfo.Exprs.push_back(nullptr); 10266 CurInfo.BasePointers.push_back(*CV); 10267 CurInfo.Pointers.push_back(*CV); 10268 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10269 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10270 // Copy to the device as an argument. No need to retrieve it. 10271 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10272 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10273 MappableExprsHandler::OMP_MAP_IMPLICIT); 10274 CurInfo.Mappers.push_back(nullptr); 10275 } else { 10276 // If we have any information in the map clause, we use it, otherwise we 10277 // just do a default mapping. 10278 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10279 if (!CI->capturesThis()) 10280 MappedVarSet.insert(CI->getCapturedVar()); 10281 else 10282 MappedVarSet.insert(nullptr); 10283 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10284 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10285 // Generate correct mapping for variables captured by reference in 10286 // lambdas. 10287 if (CI->capturesVariable()) 10288 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10289 CurInfo, LambdaPointers); 10290 } 10291 // We expect to have at least an element of information for this capture. 10292 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10293 "Non-existing map pointer for capture!"); 10294 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10295 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10296 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10297 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10298 "Inconsistent map information sizes!"); 10299 10300 // If there is an entry in PartialStruct it means we have a struct with 10301 // individual members mapped. Emit an extra combined entry. 10302 if (PartialStruct.Base.isValid()) { 10303 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10304 MEHandler.emitCombinedEntry( 10305 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10306 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10307 } 10308 10309 // We need to append the results of this capture to what we already have. 10310 CombinedInfo.append(CurInfo); 10311 } 10312 // Adjust MEMBER_OF flags for the lambdas captures. 10313 MEHandler.adjustMemberOfForLambdaCaptures( 10314 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10315 CombinedInfo.Types); 10316 // Map any list items in a map clause that were not captures because they 10317 // weren't referenced within the construct. 10318 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10319 10320 TargetDataInfo Info; 10321 // Fill up the arrays and create the arguments. 10322 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10323 emitOffloadingArraysArgument( 10324 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10325 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10326 {/*ForEndTask=*/false}); 10327 10328 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10329 InputInfo.BasePointersArray = 10330 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10331 InputInfo.PointersArray = 10332 Address(Info.PointersArray, CGM.getPointerAlign()); 10333 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10334 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10335 MapTypesArray = Info.MapTypesArray; 10336 MapNamesArray = Info.MapNamesArray; 10337 if (RequiresOuterTask) 10338 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10339 else 10340 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10341 }; 10342 10343 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10344 CodeGenFunction &CGF, PrePostActionTy &) { 10345 if (RequiresOuterTask) { 10346 CodeGenFunction::OMPTargetDataInfo InputInfo; 10347 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10348 } else { 10349 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10350 } 10351 }; 10352 10353 // If we have a target function ID it means that we need to support 10354 // offloading, otherwise, just execute on the host. We need to execute on host 10355 // regardless of the conditional in the if clause if, e.g., the user do not 10356 // specify target triples. 10357 if (OutlinedFnID) { 10358 if (IfCond) { 10359 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10360 } else { 10361 RegionCodeGenTy ThenRCG(TargetThenGen); 10362 ThenRCG(CGF); 10363 } 10364 } else { 10365 RegionCodeGenTy ElseRCG(TargetElseGen); 10366 ElseRCG(CGF); 10367 } 10368 } 10369 10370 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10371 StringRef ParentName) { 10372 if (!S) 10373 return; 10374 10375 // Codegen OMP target directives that offload compute to the device. 10376 bool RequiresDeviceCodegen = 10377 isa<OMPExecutableDirective>(S) && 10378 isOpenMPTargetExecutionDirective( 10379 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10380 10381 if (RequiresDeviceCodegen) { 10382 const auto &E = *cast<OMPExecutableDirective>(S); 10383 unsigned DeviceID; 10384 unsigned FileID; 10385 unsigned Line; 10386 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10387 FileID, Line); 10388 10389 // Is this a target region that should not be emitted as an entry point? If 10390 // so just signal we are done with this target region. 10391 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10392 ParentName, Line)) 10393 return; 10394 10395 switch (E.getDirectiveKind()) { 10396 case OMPD_target: 10397 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10398 cast<OMPTargetDirective>(E)); 10399 break; 10400 case OMPD_target_parallel: 10401 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10402 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10403 break; 10404 case OMPD_target_teams: 10405 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10406 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10407 break; 10408 case OMPD_target_teams_distribute: 10409 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10410 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10411 break; 10412 case OMPD_target_teams_distribute_simd: 10413 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10414 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10415 break; 10416 case OMPD_target_parallel_for: 10417 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10418 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10419 break; 10420 case OMPD_target_parallel_for_simd: 10421 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10422 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10423 break; 10424 case OMPD_target_simd: 10425 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10426 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10427 break; 10428 case OMPD_target_teams_distribute_parallel_for: 10429 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10430 CGM, ParentName, 10431 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10432 break; 10433 case OMPD_target_teams_distribute_parallel_for_simd: 10434 CodeGenFunction:: 10435 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10436 CGM, ParentName, 10437 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10438 break; 10439 case OMPD_parallel: 10440 case OMPD_for: 10441 case OMPD_parallel_for: 10442 case OMPD_parallel_master: 10443 case OMPD_parallel_sections: 10444 case OMPD_for_simd: 10445 case OMPD_parallel_for_simd: 10446 case OMPD_cancel: 10447 case OMPD_cancellation_point: 10448 case OMPD_ordered: 10449 case OMPD_threadprivate: 10450 case OMPD_allocate: 10451 case OMPD_task: 10452 case OMPD_simd: 10453 case OMPD_tile: 10454 case OMPD_sections: 10455 case OMPD_section: 10456 case OMPD_single: 10457 case OMPD_master: 10458 case OMPD_critical: 10459 case OMPD_taskyield: 10460 case OMPD_barrier: 10461 case OMPD_taskwait: 10462 case OMPD_taskgroup: 10463 case OMPD_atomic: 10464 case OMPD_flush: 10465 case OMPD_depobj: 10466 case OMPD_scan: 10467 case OMPD_teams: 10468 case OMPD_target_data: 10469 case OMPD_target_exit_data: 10470 case OMPD_target_enter_data: 10471 case OMPD_distribute: 10472 case OMPD_distribute_simd: 10473 case OMPD_distribute_parallel_for: 10474 case OMPD_distribute_parallel_for_simd: 10475 case OMPD_teams_distribute: 10476 case OMPD_teams_distribute_simd: 10477 case OMPD_teams_distribute_parallel_for: 10478 case OMPD_teams_distribute_parallel_for_simd: 10479 case OMPD_target_update: 10480 case OMPD_declare_simd: 10481 case OMPD_declare_variant: 10482 case OMPD_begin_declare_variant: 10483 case OMPD_end_declare_variant: 10484 case OMPD_declare_target: 10485 case OMPD_end_declare_target: 10486 case OMPD_declare_reduction: 10487 case OMPD_declare_mapper: 10488 case OMPD_taskloop: 10489 case OMPD_taskloop_simd: 10490 case OMPD_master_taskloop: 10491 case OMPD_master_taskloop_simd: 10492 case OMPD_parallel_master_taskloop: 10493 case OMPD_parallel_master_taskloop_simd: 10494 case OMPD_requires: 10495 case OMPD_unknown: 10496 default: 10497 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10498 } 10499 return; 10500 } 10501 10502 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10503 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10504 return; 10505 10506 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10507 return; 10508 } 10509 10510 // If this is a lambda function, look into its body. 10511 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10512 S = L->getBody(); 10513 10514 // Keep looking for target regions recursively. 10515 for (const Stmt *II : S->children()) 10516 scanForTargetRegionsFunctions(II, ParentName); 10517 } 10518 10519 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10520 // If emitting code for the host, we do not process FD here. Instead we do 10521 // the normal code generation. 10522 if (!CGM.getLangOpts().OpenMPIsDevice) { 10523 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10524 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10525 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10526 // Do not emit device_type(nohost) functions for the host. 10527 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10528 return true; 10529 } 10530 return false; 10531 } 10532 10533 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10534 // Try to detect target regions in the function. 10535 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10536 StringRef Name = CGM.getMangledName(GD); 10537 scanForTargetRegionsFunctions(FD->getBody(), Name); 10538 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10539 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10540 // Do not emit device_type(nohost) functions for the host. 10541 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10542 return true; 10543 } 10544 10545 // Do not to emit function if it is not marked as declare target. 10546 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10547 AlreadyEmittedTargetDecls.count(VD) == 0; 10548 } 10549 10550 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10551 if (!CGM.getLangOpts().OpenMPIsDevice) 10552 return false; 10553 10554 // Check if there are Ctors/Dtors in this declaration and look for target 10555 // regions in it. We use the complete variant to produce the kernel name 10556 // mangling. 10557 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10558 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10559 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10560 StringRef ParentName = 10561 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10562 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10563 } 10564 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10565 StringRef ParentName = 10566 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10567 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10568 } 10569 } 10570 10571 // Do not to emit variable if it is not marked as declare target. 10572 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10573 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10574 cast<VarDecl>(GD.getDecl())); 10575 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10576 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10577 HasRequiresUnifiedSharedMemory)) { 10578 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10579 return true; 10580 } 10581 return false; 10582 } 10583 10584 llvm::Constant * 10585 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10586 const VarDecl *VD) { 10587 assert(VD->getType().isConstant(CGM.getContext()) && 10588 "Expected constant variable."); 10589 StringRef VarName; 10590 llvm::Constant *Addr; 10591 llvm::GlobalValue::LinkageTypes Linkage; 10592 QualType Ty = VD->getType(); 10593 SmallString<128> Buffer; 10594 { 10595 unsigned DeviceID; 10596 unsigned FileID; 10597 unsigned Line; 10598 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10599 FileID, Line); 10600 llvm::raw_svector_ostream OS(Buffer); 10601 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10602 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10603 VarName = OS.str(); 10604 } 10605 Linkage = llvm::GlobalValue::InternalLinkage; 10606 Addr = 10607 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10608 getDefaultFirstprivateAddressSpace()); 10609 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10610 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10611 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10612 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10613 VarName, Addr, VarSize, 10614 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10615 return Addr; 10616 } 10617 10618 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10619 llvm::Constant *Addr) { 10620 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10621 !CGM.getLangOpts().OpenMPIsDevice) 10622 return; 10623 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10624 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10625 if (!Res) { 10626 if (CGM.getLangOpts().OpenMPIsDevice) { 10627 // Register non-target variables being emitted in device code (debug info 10628 // may cause this). 10629 StringRef VarName = CGM.getMangledName(VD); 10630 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10631 } 10632 return; 10633 } 10634 // Register declare target variables. 10635 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10636 StringRef VarName; 10637 CharUnits VarSize; 10638 llvm::GlobalValue::LinkageTypes Linkage; 10639 10640 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10641 !HasRequiresUnifiedSharedMemory) { 10642 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10643 VarName = CGM.getMangledName(VD); 10644 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10645 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10646 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10647 } else { 10648 VarSize = CharUnits::Zero(); 10649 } 10650 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10651 // Temp solution to prevent optimizations of the internal variables. 10652 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10653 std::string RefName = getName({VarName, "ref"}); 10654 if (!CGM.GetGlobalValue(RefName)) { 10655 llvm::Constant *AddrRef = 10656 getOrCreateInternalVariable(Addr->getType(), RefName); 10657 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10658 GVAddrRef->setConstant(/*Val=*/true); 10659 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10660 GVAddrRef->setInitializer(Addr); 10661 CGM.addCompilerUsedGlobal(GVAddrRef); 10662 } 10663 } 10664 } else { 10665 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10667 HasRequiresUnifiedSharedMemory)) && 10668 "Declare target attribute must link or to with unified memory."); 10669 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10670 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10671 else 10672 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10673 10674 if (CGM.getLangOpts().OpenMPIsDevice) { 10675 VarName = Addr->getName(); 10676 Addr = nullptr; 10677 } else { 10678 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10679 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10680 } 10681 VarSize = CGM.getPointerSize(); 10682 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10683 } 10684 10685 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10686 VarName, Addr, VarSize, Flags, Linkage); 10687 } 10688 10689 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10690 if (isa<FunctionDecl>(GD.getDecl()) || 10691 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10692 return emitTargetFunctions(GD); 10693 10694 return emitTargetGlobalVariable(GD); 10695 } 10696 10697 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10698 for (const VarDecl *VD : DeferredGlobalVariables) { 10699 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10700 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10701 if (!Res) 10702 continue; 10703 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10704 !HasRequiresUnifiedSharedMemory) { 10705 CGM.EmitGlobal(VD); 10706 } else { 10707 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10708 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10709 HasRequiresUnifiedSharedMemory)) && 10710 "Expected link clause or to clause with unified memory."); 10711 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10712 } 10713 } 10714 } 10715 10716 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10717 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10718 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10719 " Expected target-based directive."); 10720 } 10721 10722 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10723 for (const OMPClause *Clause : D->clauselists()) { 10724 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10725 HasRequiresUnifiedSharedMemory = true; 10726 } else if (const auto *AC = 10727 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10728 switch (AC->getAtomicDefaultMemOrderKind()) { 10729 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10730 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10731 break; 10732 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10733 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10734 break; 10735 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10736 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10737 break; 10738 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10739 break; 10740 } 10741 } 10742 } 10743 } 10744 10745 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10746 return RequiresAtomicOrdering; 10747 } 10748 10749 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10750 LangAS &AS) { 10751 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10752 return false; 10753 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10754 switch(A->getAllocatorType()) { 10755 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10756 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10757 // Not supported, fallback to the default mem space. 10758 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10759 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10760 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10761 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10762 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10763 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10764 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10765 AS = LangAS::Default; 10766 return true; 10767 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10768 llvm_unreachable("Expected predefined allocator for the variables with the " 10769 "static storage."); 10770 } 10771 return false; 10772 } 10773 10774 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10775 return HasRequiresUnifiedSharedMemory; 10776 } 10777 10778 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10779 CodeGenModule &CGM) 10780 : CGM(CGM) { 10781 if (CGM.getLangOpts().OpenMPIsDevice) { 10782 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10783 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10784 } 10785 } 10786 10787 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10788 if (CGM.getLangOpts().OpenMPIsDevice) 10789 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10790 } 10791 10792 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10793 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10794 return true; 10795 10796 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10797 // Do not to emit function if it is marked as declare target as it was already 10798 // emitted. 10799 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10800 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10801 if (auto *F = dyn_cast_or_null<llvm::Function>( 10802 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10803 return !F->isDeclaration(); 10804 return false; 10805 } 10806 return true; 10807 } 10808 10809 return !AlreadyEmittedTargetDecls.insert(D).second; 10810 } 10811 10812 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10813 // If we don't have entries or if we are emitting code for the device, we 10814 // don't need to do anything. 10815 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10816 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10817 (OffloadEntriesInfoManager.empty() && 10818 !HasEmittedDeclareTargetRegion && 10819 !HasEmittedTargetRegion)) 10820 return nullptr; 10821 10822 // Create and register the function that handles the requires directives. 10823 ASTContext &C = CGM.getContext(); 10824 10825 llvm::Function *RequiresRegFn; 10826 { 10827 CodeGenFunction CGF(CGM); 10828 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10829 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10830 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10831 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10832 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10833 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10834 // TODO: check for other requires clauses. 10835 // The requires directive takes effect only when a target region is 10836 // present in the compilation unit. Otherwise it is ignored and not 10837 // passed to the runtime. This avoids the runtime from throwing an error 10838 // for mismatching requires clauses across compilation units that don't 10839 // contain at least 1 target region. 10840 assert((HasEmittedTargetRegion || 10841 HasEmittedDeclareTargetRegion || 10842 !OffloadEntriesInfoManager.empty()) && 10843 "Target or declare target region expected."); 10844 if (HasRequiresUnifiedSharedMemory) 10845 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10846 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10847 CGM.getModule(), OMPRTL___tgt_register_requires), 10848 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10849 CGF.FinishFunction(); 10850 } 10851 return RequiresRegFn; 10852 } 10853 10854 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10855 const OMPExecutableDirective &D, 10856 SourceLocation Loc, 10857 llvm::Function *OutlinedFn, 10858 ArrayRef<llvm::Value *> CapturedVars) { 10859 if (!CGF.HaveInsertPoint()) 10860 return; 10861 10862 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10863 CodeGenFunction::RunCleanupsScope Scope(CGF); 10864 10865 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10866 llvm::Value *Args[] = { 10867 RTLoc, 10868 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10869 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10870 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10871 RealArgs.append(std::begin(Args), std::end(Args)); 10872 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10873 10874 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10875 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10876 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10877 } 10878 10879 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10880 const Expr *NumTeams, 10881 const Expr *ThreadLimit, 10882 SourceLocation Loc) { 10883 if (!CGF.HaveInsertPoint()) 10884 return; 10885 10886 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10887 10888 llvm::Value *NumTeamsVal = 10889 NumTeams 10890 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10891 CGF.CGM.Int32Ty, /* isSigned = */ true) 10892 : CGF.Builder.getInt32(0); 10893 10894 llvm::Value *ThreadLimitVal = 10895 ThreadLimit 10896 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10897 CGF.CGM.Int32Ty, /* isSigned = */ true) 10898 : CGF.Builder.getInt32(0); 10899 10900 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10901 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10902 ThreadLimitVal}; 10903 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10904 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10905 PushNumTeamsArgs); 10906 } 10907 10908 void CGOpenMPRuntime::emitTargetDataCalls( 10909 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10910 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10911 if (!CGF.HaveInsertPoint()) 10912 return; 10913 10914 // Action used to replace the default codegen action and turn privatization 10915 // off. 10916 PrePostActionTy NoPrivAction; 10917 10918 // Generate the code for the opening of the data environment. Capture all the 10919 // arguments of the runtime call by reference because they are used in the 10920 // closing of the region. 10921 auto &&BeginThenGen = [this, &D, Device, &Info, 10922 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10923 // Fill up the arrays with all the mapped variables. 10924 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10925 10926 // Get map clause information. 10927 MappableExprsHandler MEHandler(D, CGF); 10928 MEHandler.generateAllInfo(CombinedInfo); 10929 10930 // Fill up the arrays and create the arguments. 10931 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10932 /*IsNonContiguous=*/true); 10933 10934 llvm::Value *BasePointersArrayArg = nullptr; 10935 llvm::Value *PointersArrayArg = nullptr; 10936 llvm::Value *SizesArrayArg = nullptr; 10937 llvm::Value *MapTypesArrayArg = nullptr; 10938 llvm::Value *MapNamesArrayArg = nullptr; 10939 llvm::Value *MappersArrayArg = nullptr; 10940 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10941 SizesArrayArg, MapTypesArrayArg, 10942 MapNamesArrayArg, MappersArrayArg, Info); 10943 10944 // Emit device ID if any. 10945 llvm::Value *DeviceID = nullptr; 10946 if (Device) { 10947 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10948 CGF.Int64Ty, /*isSigned=*/true); 10949 } else { 10950 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10951 } 10952 10953 // Emit the number of elements in the offloading arrays. 10954 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10955 // 10956 // Source location for the ident struct 10957 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10958 10959 llvm::Value *OffloadingArgs[] = {RTLoc, 10960 DeviceID, 10961 PointerNum, 10962 BasePointersArrayArg, 10963 PointersArrayArg, 10964 SizesArrayArg, 10965 MapTypesArrayArg, 10966 MapNamesArrayArg, 10967 MappersArrayArg}; 10968 CGF.EmitRuntimeCall( 10969 OMPBuilder.getOrCreateRuntimeFunction( 10970 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10971 OffloadingArgs); 10972 10973 // If device pointer privatization is required, emit the body of the region 10974 // here. It will have to be duplicated: with and without privatization. 10975 if (!Info.CaptureDeviceAddrMap.empty()) 10976 CodeGen(CGF); 10977 }; 10978 10979 // Generate code for the closing of the data region. 10980 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10981 PrePostActionTy &) { 10982 assert(Info.isValid() && "Invalid data environment closing arguments."); 10983 10984 llvm::Value *BasePointersArrayArg = nullptr; 10985 llvm::Value *PointersArrayArg = nullptr; 10986 llvm::Value *SizesArrayArg = nullptr; 10987 llvm::Value *MapTypesArrayArg = nullptr; 10988 llvm::Value *MapNamesArrayArg = nullptr; 10989 llvm::Value *MappersArrayArg = nullptr; 10990 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10991 SizesArrayArg, MapTypesArrayArg, 10992 MapNamesArrayArg, MappersArrayArg, Info, 10993 {/*ForEndCall=*/true}); 10994 10995 // Emit device ID if any. 10996 llvm::Value *DeviceID = nullptr; 10997 if (Device) { 10998 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10999 CGF.Int64Ty, /*isSigned=*/true); 11000 } else { 11001 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11002 } 11003 11004 // Emit the number of elements in the offloading arrays. 11005 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11006 11007 // Source location for the ident struct 11008 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11009 11010 llvm::Value *OffloadingArgs[] = {RTLoc, 11011 DeviceID, 11012 PointerNum, 11013 BasePointersArrayArg, 11014 PointersArrayArg, 11015 SizesArrayArg, 11016 MapTypesArrayArg, 11017 MapNamesArrayArg, 11018 MappersArrayArg}; 11019 CGF.EmitRuntimeCall( 11020 OMPBuilder.getOrCreateRuntimeFunction( 11021 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11022 OffloadingArgs); 11023 }; 11024 11025 // If we need device pointer privatization, we need to emit the body of the 11026 // region with no privatization in the 'else' branch of the conditional. 11027 // Otherwise, we don't have to do anything. 11028 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11029 PrePostActionTy &) { 11030 if (!Info.CaptureDeviceAddrMap.empty()) { 11031 CodeGen.setAction(NoPrivAction); 11032 CodeGen(CGF); 11033 } 11034 }; 11035 11036 // We don't have to do anything to close the region if the if clause evaluates 11037 // to false. 11038 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11039 11040 if (IfCond) { 11041 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11042 } else { 11043 RegionCodeGenTy RCG(BeginThenGen); 11044 RCG(CGF); 11045 } 11046 11047 // If we don't require privatization of device pointers, we emit the body in 11048 // between the runtime calls. This avoids duplicating the body code. 11049 if (Info.CaptureDeviceAddrMap.empty()) { 11050 CodeGen.setAction(NoPrivAction); 11051 CodeGen(CGF); 11052 } 11053 11054 if (IfCond) { 11055 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11056 } else { 11057 RegionCodeGenTy RCG(EndThenGen); 11058 RCG(CGF); 11059 } 11060 } 11061 11062 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11063 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11064 const Expr *Device) { 11065 if (!CGF.HaveInsertPoint()) 11066 return; 11067 11068 assert((isa<OMPTargetEnterDataDirective>(D) || 11069 isa<OMPTargetExitDataDirective>(D) || 11070 isa<OMPTargetUpdateDirective>(D)) && 11071 "Expecting either target enter, exit data, or update directives."); 11072 11073 CodeGenFunction::OMPTargetDataInfo InputInfo; 11074 llvm::Value *MapTypesArray = nullptr; 11075 llvm::Value *MapNamesArray = nullptr; 11076 // Generate the code for the opening of the data environment. 11077 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11078 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11079 // Emit device ID if any. 11080 llvm::Value *DeviceID = nullptr; 11081 if (Device) { 11082 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11083 CGF.Int64Ty, /*isSigned=*/true); 11084 } else { 11085 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11086 } 11087 11088 // Emit the number of elements in the offloading arrays. 11089 llvm::Constant *PointerNum = 11090 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11091 11092 // Source location for the ident struct 11093 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11094 11095 llvm::Value *OffloadingArgs[] = {RTLoc, 11096 DeviceID, 11097 PointerNum, 11098 InputInfo.BasePointersArray.getPointer(), 11099 InputInfo.PointersArray.getPointer(), 11100 InputInfo.SizesArray.getPointer(), 11101 MapTypesArray, 11102 MapNamesArray, 11103 InputInfo.MappersArray.getPointer()}; 11104 11105 // Select the right runtime function call for each standalone 11106 // directive. 11107 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11108 RuntimeFunction RTLFn; 11109 switch (D.getDirectiveKind()) { 11110 case OMPD_target_enter_data: 11111 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11112 : OMPRTL___tgt_target_data_begin_mapper; 11113 break; 11114 case OMPD_target_exit_data: 11115 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11116 : OMPRTL___tgt_target_data_end_mapper; 11117 break; 11118 case OMPD_target_update: 11119 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11120 : OMPRTL___tgt_target_data_update_mapper; 11121 break; 11122 case OMPD_parallel: 11123 case OMPD_for: 11124 case OMPD_parallel_for: 11125 case OMPD_parallel_master: 11126 case OMPD_parallel_sections: 11127 case OMPD_for_simd: 11128 case OMPD_parallel_for_simd: 11129 case OMPD_cancel: 11130 case OMPD_cancellation_point: 11131 case OMPD_ordered: 11132 case OMPD_threadprivate: 11133 case OMPD_allocate: 11134 case OMPD_task: 11135 case OMPD_simd: 11136 case OMPD_tile: 11137 case OMPD_sections: 11138 case OMPD_section: 11139 case OMPD_single: 11140 case OMPD_master: 11141 case OMPD_critical: 11142 case OMPD_taskyield: 11143 case OMPD_barrier: 11144 case OMPD_taskwait: 11145 case OMPD_taskgroup: 11146 case OMPD_atomic: 11147 case OMPD_flush: 11148 case OMPD_depobj: 11149 case OMPD_scan: 11150 case OMPD_teams: 11151 case OMPD_target_data: 11152 case OMPD_distribute: 11153 case OMPD_distribute_simd: 11154 case OMPD_distribute_parallel_for: 11155 case OMPD_distribute_parallel_for_simd: 11156 case OMPD_teams_distribute: 11157 case OMPD_teams_distribute_simd: 11158 case OMPD_teams_distribute_parallel_for: 11159 case OMPD_teams_distribute_parallel_for_simd: 11160 case OMPD_declare_simd: 11161 case OMPD_declare_variant: 11162 case OMPD_begin_declare_variant: 11163 case OMPD_end_declare_variant: 11164 case OMPD_declare_target: 11165 case OMPD_end_declare_target: 11166 case OMPD_declare_reduction: 11167 case OMPD_declare_mapper: 11168 case OMPD_taskloop: 11169 case OMPD_taskloop_simd: 11170 case OMPD_master_taskloop: 11171 case OMPD_master_taskloop_simd: 11172 case OMPD_parallel_master_taskloop: 11173 case OMPD_parallel_master_taskloop_simd: 11174 case OMPD_target: 11175 case OMPD_target_simd: 11176 case OMPD_target_teams_distribute: 11177 case OMPD_target_teams_distribute_simd: 11178 case OMPD_target_teams_distribute_parallel_for: 11179 case OMPD_target_teams_distribute_parallel_for_simd: 11180 case OMPD_target_teams: 11181 case OMPD_target_parallel: 11182 case OMPD_target_parallel_for: 11183 case OMPD_target_parallel_for_simd: 11184 case OMPD_requires: 11185 case OMPD_unknown: 11186 default: 11187 llvm_unreachable("Unexpected standalone target data directive."); 11188 break; 11189 } 11190 CGF.EmitRuntimeCall( 11191 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11192 OffloadingArgs); 11193 }; 11194 11195 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11196 &MapNamesArray](CodeGenFunction &CGF, 11197 PrePostActionTy &) { 11198 // Fill up the arrays with all the mapped variables. 11199 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11200 11201 // Get map clause information. 11202 MappableExprsHandler MEHandler(D, CGF); 11203 MEHandler.generateAllInfo(CombinedInfo); 11204 11205 TargetDataInfo Info; 11206 // Fill up the arrays and create the arguments. 11207 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11208 /*IsNonContiguous=*/true); 11209 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11210 D.hasClausesOfKind<OMPNowaitClause>(); 11211 emitOffloadingArraysArgument( 11212 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11213 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11214 {/*ForEndTask=*/false}); 11215 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11216 InputInfo.BasePointersArray = 11217 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11218 InputInfo.PointersArray = 11219 Address(Info.PointersArray, CGM.getPointerAlign()); 11220 InputInfo.SizesArray = 11221 Address(Info.SizesArray, CGM.getPointerAlign()); 11222 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11223 MapTypesArray = Info.MapTypesArray; 11224 MapNamesArray = Info.MapNamesArray; 11225 if (RequiresOuterTask) 11226 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11227 else 11228 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11229 }; 11230 11231 if (IfCond) { 11232 emitIfClause(CGF, IfCond, TargetThenGen, 11233 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11234 } else { 11235 RegionCodeGenTy ThenRCG(TargetThenGen); 11236 ThenRCG(CGF); 11237 } 11238 } 11239 11240 namespace { 11241 /// Kind of parameter in a function with 'declare simd' directive. 11242 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11243 /// Attribute set of the parameter. 11244 struct ParamAttrTy { 11245 ParamKindTy Kind = Vector; 11246 llvm::APSInt StrideOrArg; 11247 llvm::APSInt Alignment; 11248 }; 11249 } // namespace 11250 11251 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11252 ArrayRef<ParamAttrTy> ParamAttrs) { 11253 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11254 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11255 // of that clause. The VLEN value must be power of 2. 11256 // In other case the notion of the function`s "characteristic data type" (CDT) 11257 // is used to compute the vector length. 11258 // CDT is defined in the following order: 11259 // a) For non-void function, the CDT is the return type. 11260 // b) If the function has any non-uniform, non-linear parameters, then the 11261 // CDT is the type of the first such parameter. 11262 // c) If the CDT determined by a) or b) above is struct, union, or class 11263 // type which is pass-by-value (except for the type that maps to the 11264 // built-in complex data type), the characteristic data type is int. 11265 // d) If none of the above three cases is applicable, the CDT is int. 11266 // The VLEN is then determined based on the CDT and the size of vector 11267 // register of that ISA for which current vector version is generated. The 11268 // VLEN is computed using the formula below: 11269 // VLEN = sizeof(vector_register) / sizeof(CDT), 11270 // where vector register size specified in section 3.2.1 Registers and the 11271 // Stack Frame of original AMD64 ABI document. 11272 QualType RetType = FD->getReturnType(); 11273 if (RetType.isNull()) 11274 return 0; 11275 ASTContext &C = FD->getASTContext(); 11276 QualType CDT; 11277 if (!RetType.isNull() && !RetType->isVoidType()) { 11278 CDT = RetType; 11279 } else { 11280 unsigned Offset = 0; 11281 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11282 if (ParamAttrs[Offset].Kind == Vector) 11283 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11284 ++Offset; 11285 } 11286 if (CDT.isNull()) { 11287 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11288 if (ParamAttrs[I + Offset].Kind == Vector) { 11289 CDT = FD->getParamDecl(I)->getType(); 11290 break; 11291 } 11292 } 11293 } 11294 } 11295 if (CDT.isNull()) 11296 CDT = C.IntTy; 11297 CDT = CDT->getCanonicalTypeUnqualified(); 11298 if (CDT->isRecordType() || CDT->isUnionType()) 11299 CDT = C.IntTy; 11300 return C.getTypeSize(CDT); 11301 } 11302 11303 static void 11304 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11305 const llvm::APSInt &VLENVal, 11306 ArrayRef<ParamAttrTy> ParamAttrs, 11307 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11308 struct ISADataTy { 11309 char ISA; 11310 unsigned VecRegSize; 11311 }; 11312 ISADataTy ISAData[] = { 11313 { 11314 'b', 128 11315 }, // SSE 11316 { 11317 'c', 256 11318 }, // AVX 11319 { 11320 'd', 256 11321 }, // AVX2 11322 { 11323 'e', 512 11324 }, // AVX512 11325 }; 11326 llvm::SmallVector<char, 2> Masked; 11327 switch (State) { 11328 case OMPDeclareSimdDeclAttr::BS_Undefined: 11329 Masked.push_back('N'); 11330 Masked.push_back('M'); 11331 break; 11332 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11333 Masked.push_back('N'); 11334 break; 11335 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11336 Masked.push_back('M'); 11337 break; 11338 } 11339 for (char Mask : Masked) { 11340 for (const ISADataTy &Data : ISAData) { 11341 SmallString<256> Buffer; 11342 llvm::raw_svector_ostream Out(Buffer); 11343 Out << "_ZGV" << Data.ISA << Mask; 11344 if (!VLENVal) { 11345 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11346 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11347 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11348 } else { 11349 Out << VLENVal; 11350 } 11351 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11352 switch (ParamAttr.Kind){ 11353 case LinearWithVarStride: 11354 Out << 's' << ParamAttr.StrideOrArg; 11355 break; 11356 case Linear: 11357 Out << 'l'; 11358 if (ParamAttr.StrideOrArg != 1) 11359 Out << ParamAttr.StrideOrArg; 11360 break; 11361 case Uniform: 11362 Out << 'u'; 11363 break; 11364 case Vector: 11365 Out << 'v'; 11366 break; 11367 } 11368 if (!!ParamAttr.Alignment) 11369 Out << 'a' << ParamAttr.Alignment; 11370 } 11371 Out << '_' << Fn->getName(); 11372 Fn->addFnAttr(Out.str()); 11373 } 11374 } 11375 } 11376 11377 // This are the Functions that are needed to mangle the name of the 11378 // vector functions generated by the compiler, according to the rules 11379 // defined in the "Vector Function ABI specifications for AArch64", 11380 // available at 11381 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11382 11383 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11384 /// 11385 /// TODO: Need to implement the behavior for reference marked with a 11386 /// var or no linear modifiers (1.b in the section). For this, we 11387 /// need to extend ParamKindTy to support the linear modifiers. 11388 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11389 QT = QT.getCanonicalType(); 11390 11391 if (QT->isVoidType()) 11392 return false; 11393 11394 if (Kind == ParamKindTy::Uniform) 11395 return false; 11396 11397 if (Kind == ParamKindTy::Linear) 11398 return false; 11399 11400 // TODO: Handle linear references with modifiers 11401 11402 if (Kind == ParamKindTy::LinearWithVarStride) 11403 return false; 11404 11405 return true; 11406 } 11407 11408 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11409 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11410 QT = QT.getCanonicalType(); 11411 unsigned Size = C.getTypeSize(QT); 11412 11413 // Only scalars and complex within 16 bytes wide set PVB to true. 11414 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11415 return false; 11416 11417 if (QT->isFloatingType()) 11418 return true; 11419 11420 if (QT->isIntegerType()) 11421 return true; 11422 11423 if (QT->isPointerType()) 11424 return true; 11425 11426 // TODO: Add support for complex types (section 3.1.2, item 2). 11427 11428 return false; 11429 } 11430 11431 /// Computes the lane size (LS) of a return type or of an input parameter, 11432 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11433 /// TODO: Add support for references, section 3.2.1, item 1. 11434 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11435 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11436 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11437 if (getAArch64PBV(PTy, C)) 11438 return C.getTypeSize(PTy); 11439 } 11440 if (getAArch64PBV(QT, C)) 11441 return C.getTypeSize(QT); 11442 11443 return C.getTypeSize(C.getUIntPtrType()); 11444 } 11445 11446 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11447 // signature of the scalar function, as defined in 3.2.2 of the 11448 // AAVFABI. 11449 static std::tuple<unsigned, unsigned, bool> 11450 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11451 QualType RetType = FD->getReturnType().getCanonicalType(); 11452 11453 ASTContext &C = FD->getASTContext(); 11454 11455 bool OutputBecomesInput = false; 11456 11457 llvm::SmallVector<unsigned, 8> Sizes; 11458 if (!RetType->isVoidType()) { 11459 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11460 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11461 OutputBecomesInput = true; 11462 } 11463 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11464 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11465 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11466 } 11467 11468 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11469 // The LS of a function parameter / return value can only be a power 11470 // of 2, starting from 8 bits, up to 128. 11471 assert(std::all_of(Sizes.begin(), Sizes.end(), 11472 [](unsigned Size) { 11473 return Size == 8 || Size == 16 || Size == 32 || 11474 Size == 64 || Size == 128; 11475 }) && 11476 "Invalid size"); 11477 11478 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11479 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11480 OutputBecomesInput); 11481 } 11482 11483 /// Mangle the parameter part of the vector function name according to 11484 /// their OpenMP classification. The mangling function is defined in 11485 /// section 3.5 of the AAVFABI. 11486 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11487 SmallString<256> Buffer; 11488 llvm::raw_svector_ostream Out(Buffer); 11489 for (const auto &ParamAttr : ParamAttrs) { 11490 switch (ParamAttr.Kind) { 11491 case LinearWithVarStride: 11492 Out << "ls" << ParamAttr.StrideOrArg; 11493 break; 11494 case Linear: 11495 Out << 'l'; 11496 // Don't print the step value if it is not present or if it is 11497 // equal to 1. 11498 if (ParamAttr.StrideOrArg != 1) 11499 Out << ParamAttr.StrideOrArg; 11500 break; 11501 case Uniform: 11502 Out << 'u'; 11503 break; 11504 case Vector: 11505 Out << 'v'; 11506 break; 11507 } 11508 11509 if (!!ParamAttr.Alignment) 11510 Out << 'a' << ParamAttr.Alignment; 11511 } 11512 11513 return std::string(Out.str()); 11514 } 11515 11516 // Function used to add the attribute. The parameter `VLEN` is 11517 // templated to allow the use of "x" when targeting scalable functions 11518 // for SVE. 11519 template <typename T> 11520 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11521 char ISA, StringRef ParSeq, 11522 StringRef MangledName, bool OutputBecomesInput, 11523 llvm::Function *Fn) { 11524 SmallString<256> Buffer; 11525 llvm::raw_svector_ostream Out(Buffer); 11526 Out << Prefix << ISA << LMask << VLEN; 11527 if (OutputBecomesInput) 11528 Out << "v"; 11529 Out << ParSeq << "_" << MangledName; 11530 Fn->addFnAttr(Out.str()); 11531 } 11532 11533 // Helper function to generate the Advanced SIMD names depending on 11534 // the value of the NDS when simdlen is not present. 11535 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11536 StringRef Prefix, char ISA, 11537 StringRef ParSeq, StringRef MangledName, 11538 bool OutputBecomesInput, 11539 llvm::Function *Fn) { 11540 switch (NDS) { 11541 case 8: 11542 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11543 OutputBecomesInput, Fn); 11544 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11545 OutputBecomesInput, Fn); 11546 break; 11547 case 16: 11548 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11549 OutputBecomesInput, Fn); 11550 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11551 OutputBecomesInput, Fn); 11552 break; 11553 case 32: 11554 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11555 OutputBecomesInput, Fn); 11556 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11557 OutputBecomesInput, Fn); 11558 break; 11559 case 64: 11560 case 128: 11561 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11562 OutputBecomesInput, Fn); 11563 break; 11564 default: 11565 llvm_unreachable("Scalar type is too wide."); 11566 } 11567 } 11568 11569 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11570 static void emitAArch64DeclareSimdFunction( 11571 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11572 ArrayRef<ParamAttrTy> ParamAttrs, 11573 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11574 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11575 11576 // Get basic data for building the vector signature. 11577 const auto Data = getNDSWDS(FD, ParamAttrs); 11578 const unsigned NDS = std::get<0>(Data); 11579 const unsigned WDS = std::get<1>(Data); 11580 const bool OutputBecomesInput = std::get<2>(Data); 11581 11582 // Check the values provided via `simdlen` by the user. 11583 // 1. A `simdlen(1)` doesn't produce vector signatures, 11584 if (UserVLEN == 1) { 11585 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11586 DiagnosticsEngine::Warning, 11587 "The clause simdlen(1) has no effect when targeting aarch64."); 11588 CGM.getDiags().Report(SLoc, DiagID); 11589 return; 11590 } 11591 11592 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11593 // Advanced SIMD output. 11594 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11595 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11596 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11597 "power of 2 when targeting Advanced SIMD."); 11598 CGM.getDiags().Report(SLoc, DiagID); 11599 return; 11600 } 11601 11602 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11603 // limits. 11604 if (ISA == 's' && UserVLEN != 0) { 11605 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11606 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11607 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11608 "lanes in the architectural constraints " 11609 "for SVE (min is 128-bit, max is " 11610 "2048-bit, by steps of 128-bit)"); 11611 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11612 return; 11613 } 11614 } 11615 11616 // Sort out parameter sequence. 11617 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11618 StringRef Prefix = "_ZGV"; 11619 // Generate simdlen from user input (if any). 11620 if (UserVLEN) { 11621 if (ISA == 's') { 11622 // SVE generates only a masked function. 11623 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11624 OutputBecomesInput, Fn); 11625 } else { 11626 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11627 // Advanced SIMD generates one or two functions, depending on 11628 // the `[not]inbranch` clause. 11629 switch (State) { 11630 case OMPDeclareSimdDeclAttr::BS_Undefined: 11631 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11632 OutputBecomesInput, Fn); 11633 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11634 OutputBecomesInput, Fn); 11635 break; 11636 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11637 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11638 OutputBecomesInput, Fn); 11639 break; 11640 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11641 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11642 OutputBecomesInput, Fn); 11643 break; 11644 } 11645 } 11646 } else { 11647 // If no user simdlen is provided, follow the AAVFABI rules for 11648 // generating the vector length. 11649 if (ISA == 's') { 11650 // SVE, section 3.4.1, item 1. 11651 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11652 OutputBecomesInput, Fn); 11653 } else { 11654 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11655 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11656 // two vector names depending on the use of the clause 11657 // `[not]inbranch`. 11658 switch (State) { 11659 case OMPDeclareSimdDeclAttr::BS_Undefined: 11660 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11661 OutputBecomesInput, Fn); 11662 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11663 OutputBecomesInput, Fn); 11664 break; 11665 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11666 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11667 OutputBecomesInput, Fn); 11668 break; 11669 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11670 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11671 OutputBecomesInput, Fn); 11672 break; 11673 } 11674 } 11675 } 11676 } 11677 11678 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11679 llvm::Function *Fn) { 11680 ASTContext &C = CGM.getContext(); 11681 FD = FD->getMostRecentDecl(); 11682 // Map params to their positions in function decl. 11683 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11684 if (isa<CXXMethodDecl>(FD)) 11685 ParamPositions.try_emplace(FD, 0); 11686 unsigned ParamPos = ParamPositions.size(); 11687 for (const ParmVarDecl *P : FD->parameters()) { 11688 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11689 ++ParamPos; 11690 } 11691 while (FD) { 11692 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11693 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11694 // Mark uniform parameters. 11695 for (const Expr *E : Attr->uniforms()) { 11696 E = E->IgnoreParenImpCasts(); 11697 unsigned Pos; 11698 if (isa<CXXThisExpr>(E)) { 11699 Pos = ParamPositions[FD]; 11700 } else { 11701 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11702 ->getCanonicalDecl(); 11703 Pos = ParamPositions[PVD]; 11704 } 11705 ParamAttrs[Pos].Kind = Uniform; 11706 } 11707 // Get alignment info. 11708 auto NI = Attr->alignments_begin(); 11709 for (const Expr *E : Attr->aligneds()) { 11710 E = E->IgnoreParenImpCasts(); 11711 unsigned Pos; 11712 QualType ParmTy; 11713 if (isa<CXXThisExpr>(E)) { 11714 Pos = ParamPositions[FD]; 11715 ParmTy = E->getType(); 11716 } else { 11717 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11718 ->getCanonicalDecl(); 11719 Pos = ParamPositions[PVD]; 11720 ParmTy = PVD->getType(); 11721 } 11722 ParamAttrs[Pos].Alignment = 11723 (*NI) 11724 ? (*NI)->EvaluateKnownConstInt(C) 11725 : llvm::APSInt::getUnsigned( 11726 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11727 .getQuantity()); 11728 ++NI; 11729 } 11730 // Mark linear parameters. 11731 auto SI = Attr->steps_begin(); 11732 auto MI = Attr->modifiers_begin(); 11733 for (const Expr *E : Attr->linears()) { 11734 E = E->IgnoreParenImpCasts(); 11735 unsigned Pos; 11736 // Rescaling factor needed to compute the linear parameter 11737 // value in the mangled name. 11738 unsigned PtrRescalingFactor = 1; 11739 if (isa<CXXThisExpr>(E)) { 11740 Pos = ParamPositions[FD]; 11741 } else { 11742 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11743 ->getCanonicalDecl(); 11744 Pos = ParamPositions[PVD]; 11745 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11746 PtrRescalingFactor = CGM.getContext() 11747 .getTypeSizeInChars(P->getPointeeType()) 11748 .getQuantity(); 11749 } 11750 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11751 ParamAttr.Kind = Linear; 11752 // Assuming a stride of 1, for `linear` without modifiers. 11753 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11754 if (*SI) { 11755 Expr::EvalResult Result; 11756 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11757 if (const auto *DRE = 11758 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11759 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11760 ParamAttr.Kind = LinearWithVarStride; 11761 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11762 ParamPositions[StridePVD->getCanonicalDecl()]); 11763 } 11764 } 11765 } else { 11766 ParamAttr.StrideOrArg = Result.Val.getInt(); 11767 } 11768 } 11769 // If we are using a linear clause on a pointer, we need to 11770 // rescale the value of linear_step with the byte size of the 11771 // pointee type. 11772 if (Linear == ParamAttr.Kind) 11773 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11774 ++SI; 11775 ++MI; 11776 } 11777 llvm::APSInt VLENVal; 11778 SourceLocation ExprLoc; 11779 const Expr *VLENExpr = Attr->getSimdlen(); 11780 if (VLENExpr) { 11781 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11782 ExprLoc = VLENExpr->getExprLoc(); 11783 } 11784 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11785 if (CGM.getTriple().isX86()) { 11786 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11787 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11788 unsigned VLEN = VLENVal.getExtValue(); 11789 StringRef MangledName = Fn->getName(); 11790 if (CGM.getTarget().hasFeature("sve")) 11791 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11792 MangledName, 's', 128, Fn, ExprLoc); 11793 if (CGM.getTarget().hasFeature("neon")) 11794 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11795 MangledName, 'n', 128, Fn, ExprLoc); 11796 } 11797 } 11798 FD = FD->getPreviousDecl(); 11799 } 11800 } 11801 11802 namespace { 11803 /// Cleanup action for doacross support. 11804 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11805 public: 11806 static const int DoacrossFinArgs = 2; 11807 11808 private: 11809 llvm::FunctionCallee RTLFn; 11810 llvm::Value *Args[DoacrossFinArgs]; 11811 11812 public: 11813 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11814 ArrayRef<llvm::Value *> CallArgs) 11815 : RTLFn(RTLFn) { 11816 assert(CallArgs.size() == DoacrossFinArgs); 11817 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11818 } 11819 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11820 if (!CGF.HaveInsertPoint()) 11821 return; 11822 CGF.EmitRuntimeCall(RTLFn, Args); 11823 } 11824 }; 11825 } // namespace 11826 11827 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11828 const OMPLoopDirective &D, 11829 ArrayRef<Expr *> NumIterations) { 11830 if (!CGF.HaveInsertPoint()) 11831 return; 11832 11833 ASTContext &C = CGM.getContext(); 11834 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11835 RecordDecl *RD; 11836 if (KmpDimTy.isNull()) { 11837 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11838 // kmp_int64 lo; // lower 11839 // kmp_int64 up; // upper 11840 // kmp_int64 st; // stride 11841 // }; 11842 RD = C.buildImplicitRecord("kmp_dim"); 11843 RD->startDefinition(); 11844 addFieldToRecordDecl(C, RD, Int64Ty); 11845 addFieldToRecordDecl(C, RD, Int64Ty); 11846 addFieldToRecordDecl(C, RD, Int64Ty); 11847 RD->completeDefinition(); 11848 KmpDimTy = C.getRecordType(RD); 11849 } else { 11850 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11851 } 11852 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11853 QualType ArrayTy = 11854 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11855 11856 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11857 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11858 enum { LowerFD = 0, UpperFD, StrideFD }; 11859 // Fill dims with data. 11860 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11861 LValue DimsLVal = CGF.MakeAddrLValue( 11862 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11863 // dims.upper = num_iterations; 11864 LValue UpperLVal = CGF.EmitLValueForField( 11865 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11866 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11867 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11868 Int64Ty, NumIterations[I]->getExprLoc()); 11869 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11870 // dims.stride = 1; 11871 LValue StrideLVal = CGF.EmitLValueForField( 11872 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11873 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11874 StrideLVal); 11875 } 11876 11877 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11878 // kmp_int32 num_dims, struct kmp_dim * dims); 11879 llvm::Value *Args[] = { 11880 emitUpdateLocation(CGF, D.getBeginLoc()), 11881 getThreadID(CGF, D.getBeginLoc()), 11882 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11883 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11884 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11885 CGM.VoidPtrTy)}; 11886 11887 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11888 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11889 CGF.EmitRuntimeCall(RTLFn, Args); 11890 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11891 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11892 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11893 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11894 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11895 llvm::makeArrayRef(FiniArgs)); 11896 } 11897 11898 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11899 const OMPDependClause *C) { 11900 QualType Int64Ty = 11901 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11902 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11903 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11904 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11905 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11906 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11907 const Expr *CounterVal = C->getLoopData(I); 11908 assert(CounterVal); 11909 llvm::Value *CntVal = CGF.EmitScalarConversion( 11910 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11911 CounterVal->getExprLoc()); 11912 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11913 /*Volatile=*/false, Int64Ty); 11914 } 11915 llvm::Value *Args[] = { 11916 emitUpdateLocation(CGF, C->getBeginLoc()), 11917 getThreadID(CGF, C->getBeginLoc()), 11918 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11919 llvm::FunctionCallee RTLFn; 11920 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11921 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11922 OMPRTL___kmpc_doacross_post); 11923 } else { 11924 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11925 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11926 OMPRTL___kmpc_doacross_wait); 11927 } 11928 CGF.EmitRuntimeCall(RTLFn, Args); 11929 } 11930 11931 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11932 llvm::FunctionCallee Callee, 11933 ArrayRef<llvm::Value *> Args) const { 11934 assert(Loc.isValid() && "Outlined function call location must be valid."); 11935 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11936 11937 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11938 if (Fn->doesNotThrow()) { 11939 CGF.EmitNounwindRuntimeCall(Fn, Args); 11940 return; 11941 } 11942 } 11943 CGF.EmitRuntimeCall(Callee, Args); 11944 } 11945 11946 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11947 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11948 ArrayRef<llvm::Value *> Args) const { 11949 emitCall(CGF, Loc, OutlinedFn, Args); 11950 } 11951 11952 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11953 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11954 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11955 HasEmittedDeclareTargetRegion = true; 11956 } 11957 11958 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11959 const VarDecl *NativeParam, 11960 const VarDecl *TargetParam) const { 11961 return CGF.GetAddrOfLocalVar(NativeParam); 11962 } 11963 11964 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11965 const VarDecl *VD) { 11966 if (!VD) 11967 return Address::invalid(); 11968 Address UntiedAddr = Address::invalid(); 11969 Address UntiedRealAddr = Address::invalid(); 11970 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11971 if (It != FunctionToUntiedTaskStackMap.end()) { 11972 const UntiedLocalVarsAddressesMap &UntiedData = 11973 UntiedLocalVarsStack[It->second]; 11974 auto I = UntiedData.find(VD); 11975 if (I != UntiedData.end()) { 11976 UntiedAddr = I->second.first; 11977 UntiedRealAddr = I->second.second; 11978 } 11979 } 11980 const VarDecl *CVD = VD->getCanonicalDecl(); 11981 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11982 // Use the default allocation. 11983 if (!isAllocatableDecl(VD)) 11984 return UntiedAddr; 11985 llvm::Value *Size; 11986 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11987 if (CVD->getType()->isVariablyModifiedType()) { 11988 Size = CGF.getTypeSize(CVD->getType()); 11989 // Align the size: ((size + align - 1) / align) * align 11990 Size = CGF.Builder.CreateNUWAdd( 11991 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11992 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11993 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11994 } else { 11995 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11996 Size = CGM.getSize(Sz.alignTo(Align)); 11997 } 11998 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11999 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12000 assert(AA->getAllocator() && 12001 "Expected allocator expression for non-default allocator."); 12002 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12003 // According to the standard, the original allocator type is a enum 12004 // (integer). Convert to pointer type, if required. 12005 Allocator = CGF.EmitScalarConversion( 12006 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12007 AA->getAllocator()->getExprLoc()); 12008 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12009 12010 llvm::Value *Addr = 12011 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12012 CGM.getModule(), OMPRTL___kmpc_alloc), 12013 Args, getName({CVD->getName(), ".void.addr"})); 12014 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12015 CGM.getModule(), OMPRTL___kmpc_free); 12016 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12017 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12018 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12019 if (UntiedAddr.isValid()) 12020 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12021 12022 // Cleanup action for allocate support. 12023 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12024 llvm::FunctionCallee RTLFn; 12025 unsigned LocEncoding; 12026 Address Addr; 12027 const Expr *Allocator; 12028 12029 public: 12030 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 12031 Address Addr, const Expr *Allocator) 12032 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12033 Allocator(Allocator) {} 12034 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12035 if (!CGF.HaveInsertPoint()) 12036 return; 12037 llvm::Value *Args[3]; 12038 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12039 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12040 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12041 Addr.getPointer(), CGF.VoidPtrTy); 12042 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12043 // According to the standard, the original allocator type is a enum 12044 // (integer). Convert to pointer type, if required. 12045 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12046 CGF.getContext().VoidPtrTy, 12047 Allocator->getExprLoc()); 12048 Args[2] = AllocVal; 12049 12050 CGF.EmitRuntimeCall(RTLFn, Args); 12051 } 12052 }; 12053 Address VDAddr = 12054 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12055 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12056 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12057 VDAddr, AA->getAllocator()); 12058 if (UntiedRealAddr.isValid()) 12059 if (auto *Region = 12060 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12061 Region->emitUntiedSwitch(CGF); 12062 return VDAddr; 12063 } 12064 return UntiedAddr; 12065 } 12066 12067 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12068 const VarDecl *VD) const { 12069 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12070 if (It == FunctionToUntiedTaskStackMap.end()) 12071 return false; 12072 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12073 } 12074 12075 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12076 CodeGenModule &CGM, const OMPLoopDirective &S) 12077 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12078 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12079 if (!NeedToPush) 12080 return; 12081 NontemporalDeclsSet &DS = 12082 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12083 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12084 for (const Stmt *Ref : C->private_refs()) { 12085 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12086 const ValueDecl *VD; 12087 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12088 VD = DRE->getDecl(); 12089 } else { 12090 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12091 assert((ME->isImplicitCXXThis() || 12092 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12093 "Expected member of current class."); 12094 VD = ME->getMemberDecl(); 12095 } 12096 DS.insert(VD); 12097 } 12098 } 12099 } 12100 12101 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12102 if (!NeedToPush) 12103 return; 12104 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12105 } 12106 12107 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12108 CodeGenFunction &CGF, 12109 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 12110 std::pair<Address, Address>> &LocalVars) 12111 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12112 if (!NeedToPush) 12113 return; 12114 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12115 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12116 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12117 } 12118 12119 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12120 if (!NeedToPush) 12121 return; 12122 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12123 } 12124 12125 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12126 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12127 12128 return llvm::any_of( 12129 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12130 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12131 } 12132 12133 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12134 const OMPExecutableDirective &S, 12135 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12136 const { 12137 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12138 // Vars in target/task regions must be excluded completely. 12139 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12140 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12141 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12142 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12143 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12144 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12145 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12146 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12147 } 12148 } 12149 // Exclude vars in private clauses. 12150 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12151 for (const Expr *Ref : C->varlists()) { 12152 if (!Ref->getType()->isScalarType()) 12153 continue; 12154 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12155 if (!DRE) 12156 continue; 12157 NeedToCheckForLPCs.insert(DRE->getDecl()); 12158 } 12159 } 12160 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12161 for (const Expr *Ref : C->varlists()) { 12162 if (!Ref->getType()->isScalarType()) 12163 continue; 12164 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12165 if (!DRE) 12166 continue; 12167 NeedToCheckForLPCs.insert(DRE->getDecl()); 12168 } 12169 } 12170 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12171 for (const Expr *Ref : C->varlists()) { 12172 if (!Ref->getType()->isScalarType()) 12173 continue; 12174 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12175 if (!DRE) 12176 continue; 12177 NeedToCheckForLPCs.insert(DRE->getDecl()); 12178 } 12179 } 12180 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12181 for (const Expr *Ref : C->varlists()) { 12182 if (!Ref->getType()->isScalarType()) 12183 continue; 12184 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12185 if (!DRE) 12186 continue; 12187 NeedToCheckForLPCs.insert(DRE->getDecl()); 12188 } 12189 } 12190 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12191 for (const Expr *Ref : C->varlists()) { 12192 if (!Ref->getType()->isScalarType()) 12193 continue; 12194 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12195 if (!DRE) 12196 continue; 12197 NeedToCheckForLPCs.insert(DRE->getDecl()); 12198 } 12199 } 12200 for (const Decl *VD : NeedToCheckForLPCs) { 12201 for (const LastprivateConditionalData &Data : 12202 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12203 if (Data.DeclToUniqueName.count(VD) > 0) { 12204 if (!Data.Disabled) 12205 NeedToAddForLPCsAsDisabled.insert(VD); 12206 break; 12207 } 12208 } 12209 } 12210 } 12211 12212 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12213 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12214 : CGM(CGF.CGM), 12215 Action((CGM.getLangOpts().OpenMP >= 50 && 12216 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12217 [](const OMPLastprivateClause *C) { 12218 return C->getKind() == 12219 OMPC_LASTPRIVATE_conditional; 12220 })) 12221 ? ActionToDo::PushAsLastprivateConditional 12222 : ActionToDo::DoNotPush) { 12223 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12224 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12225 return; 12226 assert(Action == ActionToDo::PushAsLastprivateConditional && 12227 "Expected a push action."); 12228 LastprivateConditionalData &Data = 12229 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12230 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12231 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12232 continue; 12233 12234 for (const Expr *Ref : C->varlists()) { 12235 Data.DeclToUniqueName.insert(std::make_pair( 12236 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12237 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12238 } 12239 } 12240 Data.IVLVal = IVLVal; 12241 Data.Fn = CGF.CurFn; 12242 } 12243 12244 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12245 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12246 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12247 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12248 if (CGM.getLangOpts().OpenMP < 50) 12249 return; 12250 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12251 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12252 if (!NeedToAddForLPCsAsDisabled.empty()) { 12253 Action = ActionToDo::DisableLastprivateConditional; 12254 LastprivateConditionalData &Data = 12255 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12256 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12257 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12258 Data.Fn = CGF.CurFn; 12259 Data.Disabled = true; 12260 } 12261 } 12262 12263 CGOpenMPRuntime::LastprivateConditionalRAII 12264 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12265 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12266 return LastprivateConditionalRAII(CGF, S); 12267 } 12268 12269 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12270 if (CGM.getLangOpts().OpenMP < 50) 12271 return; 12272 if (Action == ActionToDo::DisableLastprivateConditional) { 12273 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12274 "Expected list of disabled private vars."); 12275 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12276 } 12277 if (Action == ActionToDo::PushAsLastprivateConditional) { 12278 assert( 12279 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12280 "Expected list of lastprivate conditional vars."); 12281 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12282 } 12283 } 12284 12285 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12286 const VarDecl *VD) { 12287 ASTContext &C = CGM.getContext(); 12288 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12289 if (I == LastprivateConditionalToTypes.end()) 12290 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12291 QualType NewType; 12292 const FieldDecl *VDField; 12293 const FieldDecl *FiredField; 12294 LValue BaseLVal; 12295 auto VI = I->getSecond().find(VD); 12296 if (VI == I->getSecond().end()) { 12297 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12298 RD->startDefinition(); 12299 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12300 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12301 RD->completeDefinition(); 12302 NewType = C.getRecordType(RD); 12303 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12304 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12305 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12306 } else { 12307 NewType = std::get<0>(VI->getSecond()); 12308 VDField = std::get<1>(VI->getSecond()); 12309 FiredField = std::get<2>(VI->getSecond()); 12310 BaseLVal = std::get<3>(VI->getSecond()); 12311 } 12312 LValue FiredLVal = 12313 CGF.EmitLValueForField(BaseLVal, FiredField); 12314 CGF.EmitStoreOfScalar( 12315 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12316 FiredLVal); 12317 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12318 } 12319 12320 namespace { 12321 /// Checks if the lastprivate conditional variable is referenced in LHS. 12322 class LastprivateConditionalRefChecker final 12323 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12324 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12325 const Expr *FoundE = nullptr; 12326 const Decl *FoundD = nullptr; 12327 StringRef UniqueDeclName; 12328 LValue IVLVal; 12329 llvm::Function *FoundFn = nullptr; 12330 SourceLocation Loc; 12331 12332 public: 12333 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12334 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12335 llvm::reverse(LPM)) { 12336 auto It = D.DeclToUniqueName.find(E->getDecl()); 12337 if (It == D.DeclToUniqueName.end()) 12338 continue; 12339 if (D.Disabled) 12340 return false; 12341 FoundE = E; 12342 FoundD = E->getDecl()->getCanonicalDecl(); 12343 UniqueDeclName = It->second; 12344 IVLVal = D.IVLVal; 12345 FoundFn = D.Fn; 12346 break; 12347 } 12348 return FoundE == E; 12349 } 12350 bool VisitMemberExpr(const MemberExpr *E) { 12351 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12352 return false; 12353 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12354 llvm::reverse(LPM)) { 12355 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12356 if (It == D.DeclToUniqueName.end()) 12357 continue; 12358 if (D.Disabled) 12359 return false; 12360 FoundE = E; 12361 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12362 UniqueDeclName = It->second; 12363 IVLVal = D.IVLVal; 12364 FoundFn = D.Fn; 12365 break; 12366 } 12367 return FoundE == E; 12368 } 12369 bool VisitStmt(const Stmt *S) { 12370 for (const Stmt *Child : S->children()) { 12371 if (!Child) 12372 continue; 12373 if (const auto *E = dyn_cast<Expr>(Child)) 12374 if (!E->isGLValue()) 12375 continue; 12376 if (Visit(Child)) 12377 return true; 12378 } 12379 return false; 12380 } 12381 explicit LastprivateConditionalRefChecker( 12382 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12383 : LPM(LPM) {} 12384 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12385 getFoundData() const { 12386 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12387 } 12388 }; 12389 } // namespace 12390 12391 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12392 LValue IVLVal, 12393 StringRef UniqueDeclName, 12394 LValue LVal, 12395 SourceLocation Loc) { 12396 // Last updated loop counter for the lastprivate conditional var. 12397 // int<xx> last_iv = 0; 12398 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12399 llvm::Constant *LastIV = 12400 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12401 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12402 IVLVal.getAlignment().getAsAlign()); 12403 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12404 12405 // Last value of the lastprivate conditional. 12406 // decltype(priv_a) last_a; 12407 llvm::Constant *Last = getOrCreateInternalVariable( 12408 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12409 cast<llvm::GlobalVariable>(Last)->setAlignment( 12410 LVal.getAlignment().getAsAlign()); 12411 LValue LastLVal = 12412 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12413 12414 // Global loop counter. Required to handle inner parallel-for regions. 12415 // iv 12416 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12417 12418 // #pragma omp critical(a) 12419 // if (last_iv <= iv) { 12420 // last_iv = iv; 12421 // last_a = priv_a; 12422 // } 12423 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12424 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12425 Action.Enter(CGF); 12426 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12427 // (last_iv <= iv) ? Check if the variable is updated and store new 12428 // value in global var. 12429 llvm::Value *CmpRes; 12430 if (IVLVal.getType()->isSignedIntegerType()) { 12431 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12432 } else { 12433 assert(IVLVal.getType()->isUnsignedIntegerType() && 12434 "Loop iteration variable must be integer."); 12435 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12436 } 12437 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12438 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12439 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12440 // { 12441 CGF.EmitBlock(ThenBB); 12442 12443 // last_iv = iv; 12444 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12445 12446 // last_a = priv_a; 12447 switch (CGF.getEvaluationKind(LVal.getType())) { 12448 case TEK_Scalar: { 12449 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12450 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12451 break; 12452 } 12453 case TEK_Complex: { 12454 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12455 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12456 break; 12457 } 12458 case TEK_Aggregate: 12459 llvm_unreachable( 12460 "Aggregates are not supported in lastprivate conditional."); 12461 } 12462 // } 12463 CGF.EmitBranch(ExitBB); 12464 // There is no need to emit line number for unconditional branch. 12465 (void)ApplyDebugLocation::CreateEmpty(CGF); 12466 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12467 }; 12468 12469 if (CGM.getLangOpts().OpenMPSimd) { 12470 // Do not emit as a critical region as no parallel region could be emitted. 12471 RegionCodeGenTy ThenRCG(CodeGen); 12472 ThenRCG(CGF); 12473 } else { 12474 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12475 } 12476 } 12477 12478 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12479 const Expr *LHS) { 12480 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12481 return; 12482 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12483 if (!Checker.Visit(LHS)) 12484 return; 12485 const Expr *FoundE; 12486 const Decl *FoundD; 12487 StringRef UniqueDeclName; 12488 LValue IVLVal; 12489 llvm::Function *FoundFn; 12490 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12491 Checker.getFoundData(); 12492 if (FoundFn != CGF.CurFn) { 12493 // Special codegen for inner parallel regions. 12494 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12495 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12496 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12497 "Lastprivate conditional is not found in outer region."); 12498 QualType StructTy = std::get<0>(It->getSecond()); 12499 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12500 LValue PrivLVal = CGF.EmitLValue(FoundE); 12501 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12502 PrivLVal.getAddress(CGF), 12503 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12504 LValue BaseLVal = 12505 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12506 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12507 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12508 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12509 FiredLVal, llvm::AtomicOrdering::Unordered, 12510 /*IsVolatile=*/true, /*isInit=*/false); 12511 return; 12512 } 12513 12514 // Private address of the lastprivate conditional in the current context. 12515 // priv_a 12516 LValue LVal = CGF.EmitLValue(FoundE); 12517 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12518 FoundE->getExprLoc()); 12519 } 12520 12521 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12522 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12523 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12524 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12525 return; 12526 auto Range = llvm::reverse(LastprivateConditionalStack); 12527 auto It = llvm::find_if( 12528 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12529 if (It == Range.end() || It->Fn != CGF.CurFn) 12530 return; 12531 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12532 assert(LPCI != LastprivateConditionalToTypes.end() && 12533 "Lastprivates must be registered already."); 12534 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12535 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12536 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12537 for (const auto &Pair : It->DeclToUniqueName) { 12538 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12539 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12540 continue; 12541 auto I = LPCI->getSecond().find(Pair.first); 12542 assert(I != LPCI->getSecond().end() && 12543 "Lastprivate must be rehistered already."); 12544 // bool Cmp = priv_a.Fired != 0; 12545 LValue BaseLVal = std::get<3>(I->getSecond()); 12546 LValue FiredLVal = 12547 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12548 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12549 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12550 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12551 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12552 // if (Cmp) { 12553 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12554 CGF.EmitBlock(ThenBB); 12555 Address Addr = CGF.GetAddrOfLocalVar(VD); 12556 LValue LVal; 12557 if (VD->getType()->isReferenceType()) 12558 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12559 AlignmentSource::Decl); 12560 else 12561 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12562 AlignmentSource::Decl); 12563 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12564 D.getBeginLoc()); 12565 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12566 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12567 // } 12568 } 12569 } 12570 12571 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12572 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12573 SourceLocation Loc) { 12574 if (CGF.getLangOpts().OpenMP < 50) 12575 return; 12576 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12577 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12578 "Unknown lastprivate conditional variable."); 12579 StringRef UniqueName = It->second; 12580 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12581 // The variable was not updated in the region - exit. 12582 if (!GV) 12583 return; 12584 LValue LPLVal = CGF.MakeAddrLValue( 12585 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12586 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12587 CGF.EmitStoreOfScalar(Res, PrivLVal); 12588 } 12589 12590 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12591 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12592 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12593 llvm_unreachable("Not supported in SIMD-only mode"); 12594 } 12595 12596 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12597 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12598 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12599 llvm_unreachable("Not supported in SIMD-only mode"); 12600 } 12601 12602 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12603 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12604 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12605 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12606 bool Tied, unsigned &NumberOfParts) { 12607 llvm_unreachable("Not supported in SIMD-only mode"); 12608 } 12609 12610 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12611 SourceLocation Loc, 12612 llvm::Function *OutlinedFn, 12613 ArrayRef<llvm::Value *> CapturedVars, 12614 const Expr *IfCond) { 12615 llvm_unreachable("Not supported in SIMD-only mode"); 12616 } 12617 12618 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12619 CodeGenFunction &CGF, StringRef CriticalName, 12620 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12621 const Expr *Hint) { 12622 llvm_unreachable("Not supported in SIMD-only mode"); 12623 } 12624 12625 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12626 const RegionCodeGenTy &MasterOpGen, 12627 SourceLocation Loc) { 12628 llvm_unreachable("Not supported in SIMD-only mode"); 12629 } 12630 12631 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12632 const RegionCodeGenTy &MasterOpGen, 12633 SourceLocation Loc, 12634 const Expr *Filter) { 12635 llvm_unreachable("Not supported in SIMD-only mode"); 12636 } 12637 12638 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12639 SourceLocation Loc) { 12640 llvm_unreachable("Not supported in SIMD-only mode"); 12641 } 12642 12643 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12644 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12645 SourceLocation Loc) { 12646 llvm_unreachable("Not supported in SIMD-only mode"); 12647 } 12648 12649 void CGOpenMPSIMDRuntime::emitSingleRegion( 12650 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12651 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12652 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12653 ArrayRef<const Expr *> AssignmentOps) { 12654 llvm_unreachable("Not supported in SIMD-only mode"); 12655 } 12656 12657 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12658 const RegionCodeGenTy &OrderedOpGen, 12659 SourceLocation Loc, 12660 bool IsThreads) { 12661 llvm_unreachable("Not supported in SIMD-only mode"); 12662 } 12663 12664 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12665 SourceLocation Loc, 12666 OpenMPDirectiveKind Kind, 12667 bool EmitChecks, 12668 bool ForceSimpleCall) { 12669 llvm_unreachable("Not supported in SIMD-only mode"); 12670 } 12671 12672 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12673 CodeGenFunction &CGF, SourceLocation Loc, 12674 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12675 bool Ordered, const DispatchRTInput &DispatchValues) { 12676 llvm_unreachable("Not supported in SIMD-only mode"); 12677 } 12678 12679 void CGOpenMPSIMDRuntime::emitForStaticInit( 12680 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12681 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12682 llvm_unreachable("Not supported in SIMD-only mode"); 12683 } 12684 12685 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12686 CodeGenFunction &CGF, SourceLocation Loc, 12687 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12688 llvm_unreachable("Not supported in SIMD-only mode"); 12689 } 12690 12691 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12692 SourceLocation Loc, 12693 unsigned IVSize, 12694 bool IVSigned) { 12695 llvm_unreachable("Not supported in SIMD-only mode"); 12696 } 12697 12698 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12699 SourceLocation Loc, 12700 OpenMPDirectiveKind DKind) { 12701 llvm_unreachable("Not supported in SIMD-only mode"); 12702 } 12703 12704 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12705 SourceLocation Loc, 12706 unsigned IVSize, bool IVSigned, 12707 Address IL, Address LB, 12708 Address UB, Address ST) { 12709 llvm_unreachable("Not supported in SIMD-only mode"); 12710 } 12711 12712 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12713 llvm::Value *NumThreads, 12714 SourceLocation Loc) { 12715 llvm_unreachable("Not supported in SIMD-only mode"); 12716 } 12717 12718 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12719 ProcBindKind ProcBind, 12720 SourceLocation Loc) { 12721 llvm_unreachable("Not supported in SIMD-only mode"); 12722 } 12723 12724 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12725 const VarDecl *VD, 12726 Address VDAddr, 12727 SourceLocation Loc) { 12728 llvm_unreachable("Not supported in SIMD-only mode"); 12729 } 12730 12731 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12732 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12733 CodeGenFunction *CGF) { 12734 llvm_unreachable("Not supported in SIMD-only mode"); 12735 } 12736 12737 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12738 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12739 llvm_unreachable("Not supported in SIMD-only mode"); 12740 } 12741 12742 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12743 ArrayRef<const Expr *> Vars, 12744 SourceLocation Loc, 12745 llvm::AtomicOrdering AO) { 12746 llvm_unreachable("Not supported in SIMD-only mode"); 12747 } 12748 12749 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12750 const OMPExecutableDirective &D, 12751 llvm::Function *TaskFunction, 12752 QualType SharedsTy, Address Shareds, 12753 const Expr *IfCond, 12754 const OMPTaskDataTy &Data) { 12755 llvm_unreachable("Not supported in SIMD-only mode"); 12756 } 12757 12758 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12759 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12760 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12761 const Expr *IfCond, const OMPTaskDataTy &Data) { 12762 llvm_unreachable("Not supported in SIMD-only mode"); 12763 } 12764 12765 void CGOpenMPSIMDRuntime::emitReduction( 12766 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12767 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12768 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12769 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12770 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12771 ReductionOps, Options); 12772 } 12773 12774 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12775 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12776 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12777 llvm_unreachable("Not supported in SIMD-only mode"); 12778 } 12779 12780 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12781 SourceLocation Loc, 12782 bool IsWorksharingReduction) { 12783 llvm_unreachable("Not supported in SIMD-only mode"); 12784 } 12785 12786 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12787 SourceLocation Loc, 12788 ReductionCodeGen &RCG, 12789 unsigned N) { 12790 llvm_unreachable("Not supported in SIMD-only mode"); 12791 } 12792 12793 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12794 SourceLocation Loc, 12795 llvm::Value *ReductionsPtr, 12796 LValue SharedLVal) { 12797 llvm_unreachable("Not supported in SIMD-only mode"); 12798 } 12799 12800 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12801 SourceLocation Loc) { 12802 llvm_unreachable("Not supported in SIMD-only mode"); 12803 } 12804 12805 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12806 CodeGenFunction &CGF, SourceLocation Loc, 12807 OpenMPDirectiveKind CancelRegion) { 12808 llvm_unreachable("Not supported in SIMD-only mode"); 12809 } 12810 12811 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12812 SourceLocation Loc, const Expr *IfCond, 12813 OpenMPDirectiveKind CancelRegion) { 12814 llvm_unreachable("Not supported in SIMD-only mode"); 12815 } 12816 12817 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12818 const OMPExecutableDirective &D, StringRef ParentName, 12819 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12820 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12821 llvm_unreachable("Not supported in SIMD-only mode"); 12822 } 12823 12824 void CGOpenMPSIMDRuntime::emitTargetCall( 12825 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12826 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12827 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12828 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12829 const OMPLoopDirective &D)> 12830 SizeEmitter) { 12831 llvm_unreachable("Not supported in SIMD-only mode"); 12832 } 12833 12834 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12835 llvm_unreachable("Not supported in SIMD-only mode"); 12836 } 12837 12838 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12839 llvm_unreachable("Not supported in SIMD-only mode"); 12840 } 12841 12842 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12843 return false; 12844 } 12845 12846 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12847 const OMPExecutableDirective &D, 12848 SourceLocation Loc, 12849 llvm::Function *OutlinedFn, 12850 ArrayRef<llvm::Value *> CapturedVars) { 12851 llvm_unreachable("Not supported in SIMD-only mode"); 12852 } 12853 12854 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12855 const Expr *NumTeams, 12856 const Expr *ThreadLimit, 12857 SourceLocation Loc) { 12858 llvm_unreachable("Not supported in SIMD-only mode"); 12859 } 12860 12861 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12862 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12863 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12864 llvm_unreachable("Not supported in SIMD-only mode"); 12865 } 12866 12867 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12868 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12869 const Expr *Device) { 12870 llvm_unreachable("Not supported in SIMD-only mode"); 12871 } 12872 12873 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12874 const OMPLoopDirective &D, 12875 ArrayRef<Expr *> NumIterations) { 12876 llvm_unreachable("Not supported in SIMD-only mode"); 12877 } 12878 12879 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12880 const OMPDependClause *C) { 12881 llvm_unreachable("Not supported in SIMD-only mode"); 12882 } 12883 12884 const VarDecl * 12885 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12886 const VarDecl *NativeParam) const { 12887 llvm_unreachable("Not supported in SIMD-only mode"); 12888 } 12889 12890 Address 12891 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12892 const VarDecl *NativeParam, 12893 const VarDecl *TargetParam) const { 12894 llvm_unreachable("Not supported in SIMD-only mode"); 12895 } 12896